intel/perf: reorder xml files
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 18 Nov 2020 10:43:42 +0000 (12:43 +0200)
committerMarge Bot <eric+marge@anholt.net>
Tue, 2 Feb 2021 13:25:55 +0000 (13:25 +0000)
Make the file match the order of the ones from IGT (which have changed
because of python2->3 transition).

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Marcin Ĺšlusarz <marcin.slusarz@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6518>

15 files changed:
src/intel/perf/oa-bdw.xml
src/intel/perf/oa-bxt.xml
src/intel/perf/oa-cflgt2.xml
src/intel/perf/oa-cflgt3.xml
src/intel/perf/oa-chv.xml
src/intel/perf/oa-ehl.xml
src/intel/perf/oa-glk.xml
src/intel/perf/oa-hsw.xml
src/intel/perf/oa-icl.xml
src/intel/perf/oa-kblgt2.xml
src/intel/perf/oa-kblgt3.xml
src/intel/perf/oa-sklgt2.xml
src/intel/perf/oa-sklgt3.xml
src/intel/perf/oa-sklgt4.xml
src/intel/perf/oa-tgl.xml

index 714a1f0..71b8d10 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1525360313" merge_md5="">
   <set name="Render Metrics Basic Gen8"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="b541bd57-0e0f-4154-b4c0-5858010a2bf7"
        chipset="BDW"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="b541bd57-0e0f-4154-b4c0-5858010a2bf7"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
-             data_type="uint64"
-             equation="B 4 READ B 5 READ UADD 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler1_bottleneck"
              units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
              description="The percentage of time in which Sampler 0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which Sampler 1 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
+             data_type="uint64"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="B 4 READ B 5 READ UADD 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="L3 Lookup Accesses w/o IC"
-             description="The total number of L3 cache lookup accesses w/o IC."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
-             units="messages"
-             symbol_name="L3Lookups"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
+             description="The total number of L3 cache lookup accesses w/o IC."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Depth Throughput"
-             description="The total number of GPU memory bytes transferred between depth caches and GTI."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Depth Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
+             description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
              description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
              mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA"
                      availability="$SliceMask 0x01 AND"
   </set>
 
   <set name="Compute Metrics Basic Gen8"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="35fbc9b2-a891-40a6-a38d-022bb7057552"
        chipset="BDW"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="35fbc9b2-a891-40a6-a38d-022bb7057552"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of typed memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL  $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of typed memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL  $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA"
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="233d0544-fff7-4281-8291-e02f222aff72"
        chipset="BDW"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="233d0544-fff7-4281-8291-e02f222aff72"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
-             units="percent"
-             symbol_name="SfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
              units="messages"
-             symbol_name="SamplerAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="HS Bottleneck"
-             low_watermark="3"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
              description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="3"
              high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
     <counter name="SO Bottleneck"
-             low_watermark="5"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
              description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Early Depth Bottleneck"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
+             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Clipper Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
-    <counter name="Early Depth Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
              data_type="float"
-             high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Memory Reads Distribution Gen8"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="2b255d48-2117-4fef-a8f7-f151e1d25a2c"
        chipset="BDW"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="2b255d48-2117-4fef-a8f7-f151e1d25a2c"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
              units="messages"
-             symbol_name="GtiRccMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
+    <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
+             description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
              units="messages"
-             symbol_name="GtiL3Bank1Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiCmdStreamerMemoryReads"
-             description="The total number of GTI memory reads from Command Streamer."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
              units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
              units="messages"
-             symbol_name="GtiL3Bank2Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Memory Writes Distribution Gen8"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="f7fd3220-b466-4a4d-9f98-b0caf3f2394c"
        chipset="BDW"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="f7fd3220-b466-4a4d-9f98-b0caf3f2394c"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
-             data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
-             />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
     <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
              description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Compute Metrics Extended Gen8"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="e99ccaca-821c-4df9-97a7-96bdb7204e43"
        chipset="BDW"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="e99ccaca-821c-4df9-97a7-96bdb7204e43"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
              units="messages"
-             symbol_name="EuTypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="Ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu atomic requests to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuUntypedReads0"
-             description="The subslice 0 EU Untyped Reads subslice 0."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
+             description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
              units="messages"
-             symbol_name="UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedAtomics0"
-             description="The subslice 0 EU Untyped Atomics subslice 0."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
              description="The subslice 0 EU Untyped Writes subslice 0."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
              units="messages"
-             symbol_name="EuUntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedWrites0"
-             description="The subslice 0 EU A64 Untyped Writes subslice 0."
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
              units="messages"
-             symbol_name="EuA64UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="UntypedWritesPerCacheLine"
-             description="Ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu requests to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
+             description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
+             description="The subslice 0 EU A64 Untyped Reads subslice 0."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
+             description="The subslice 0 EU A64 Untyped Writes subslice 0."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 2 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="Ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu requests to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
              data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
              units="messages"
-             symbol_name="TypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
              description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 3 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedReads0"
-             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 1 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
              units="messages"
-             symbol_name="EuTypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="UntypedReadsPerCacheLine"
-             description="Ratio of EU untyped read requests to L3 cache line reads."
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="Ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu requests to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="Ratio of EU typed write requests to L3 cache line writes."
+             data_type="float"
+             units="eu requests to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="Ratio of EU typed read requests to L3 cache line reads."
+    <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
+             description="Ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu requests to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="Ratio of EU untyped write requests to L3 cache line writes."
+             data_type="float"
+             units="eu requests to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="Ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu atomic requests to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA"
                      availability="$SubsliceMask 0x01 AND"
   </set>
 
   <set name="Compute Metrics L3 Cache Gen8"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="27a364dc-8225-4ecb-b607-d6f1925598d9"
        chipset="BDW"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="27a364dc-8225-4ecb-b607-d6f1925598d9"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ C 2 READ C 3 READ B 6 READ B 7 READ UADD UADD UADD UADD UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
-             units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OCL OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OCL OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OCL OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OCL OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
-             data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
-             data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
              />
     <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
              description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
              units="percent"
-             symbol_name="EuMoveFpu0Instruction"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="L3 Bank 10 Accesses"
-             description="The total number of accesses to L3 Bank 10."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="C 2 READ 2 UMUL"
-             underscore_name="l3_bank10_accesses"
-             units="messages"
-             symbol_name="L3Bank10Accesses"
-             availability="$SliceMask 0x02 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 10 IC Accesses"
-             description="The total number of accesses to L3 Bank 10 from IC cache."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="B 4 READ B 5 READ UADD 2 UMUL $L3Bank10Accesses UMIN"
-             underscore_name="l3_bank10_ic_accesses"
              units="messages"
-             symbol_name="L3Bank10IcAccesses"
-             availability="$SliceMask 0x02 AND"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 10 IC Hits"
-             description="The total number of hits in L3 Bank 10 from IC cache."
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
              data_type="uint64"
-             equation="B 5 READ 2 UMUL $L3Bank10IcAccesses UMIN"
-             underscore_name="l3_bank10_ic_hits"
              units="messages"
-             symbol_name="L3Bank10IcHits"
-             availability="$SliceMask 0x02 AND"
              semantic_type="event"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ C 2 READ C 3 READ B 6 READ B 7 READ UADD UADD UADD UADD UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
              units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
              units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
              />
     <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
              description="The total number of accesses to L3 Bank 01."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
              mdapi_group="L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
+             description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Bank 10 Accesses"
+             symbol_name="L3Bank10Accesses"
+             underscore_name="l3_bank10_accesses"
+             description="The total number of accesses to L3 Bank 10."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             equation="C 2 READ 2 UMUL"
+             availability="$SliceMask 0x02 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="L3 Bank 11 Accesses"
+             symbol_name="L3Bank11Accesses"
+             underscore_name="l3_bank11_accesses"
              description="The total number of accesses to L3 Bank 11."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="l3_bank11_accesses"
              units="messages"
-             symbol_name="L3Bank11Accesses"
-             availability="$SliceMask 0x02 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="C 3 READ 2 UMUL"
+             availability="$SliceMask 0x02 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
              />
-    <counter name="L3 Bank 02 Accesses"
-             description="The total number of accesses to L3 Bank 02."
+    <counter name="L3 Bank 12 Accesses"
+             symbol_name="L3Bank12Accesses"
+             underscore_name="l3_bank12_accesses"
+             description="The total number of accesses to L3 Bank 12."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 6 READ 2 UMUL"
+             availability="$SliceMask 0x02 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
              />
     <counter name="L3 Bank 13 Accesses"
+             symbol_name="L3Bank13Accesses"
+             underscore_name="l3_bank13_accesses"
              description="The total number of accesses to L3 Bank 13."
              data_type="uint64"
-             equation="B 7 READ 2 UMUL"
-             underscore_name="l3_bank13_accesses"
              units="messages"
-             symbol_name="L3Bank13Accesses"
-             availability="$SliceMask 0x02 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 7 READ 2 UMUL"
+             availability="$SliceMask 0x02 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="L3 Bank 10 IC Accesses"
+             symbol_name="L3Bank10IcAccesses"
+             underscore_name="l3_bank10_ic_accesses"
+             description="The total number of accesses to L3 Bank 10 from IC cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
-             units="bytes"
-             symbol_name="GtiWriteThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="B 4 READ B 5 READ UADD 2 UMUL $L3Bank10Accesses UMIN"
+             availability="$SliceMask 0x02 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="L3 Bank 10 IC Hits"
+             symbol_name="L3Bank10IcHits"
+             underscore_name="l3_bank10_ic_hits"
+             description="The total number of hits in L3 Bank 10 from IC cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ 2 UMUL $L3Bank10IcAccesses UMIN"
+             availability="$SliceMask 0x02 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="L3 Bank 12 Accesses"
-             description="The total number of accesses to L3 Bank 12."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="B 6 READ 2 UMUL"
-             underscore_name="l3_bank12_accesses"
-             units="messages"
-             symbol_name="L3Bank12Accesses"
-             availability="$SliceMask 0x02 AND"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Data Port Reads Coalescing Gen8"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="data_port_reads_coalescing"
-       hw_config_guid="857fc630-2f09-4804-85f1-084adfadd5ab"
        chipset="BDW"
        symbol_name="DataPortReadsCoalescing"
+       underscore_name="data_port_reads_coalescing"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="857fc630-2f09-4804-85f1-084adfadd5ab"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
-             units="percent"
-             symbol_name="Fpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
-             units="percent"
-             symbol_name="Fpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU AVG IPC Rate"
-             description="The average rate of IPC calculated for 2 FPU pipelines."
-             data_type="float"
-             max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
-             units="number"
-             symbol_name="EuAvgIpcRate"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
+             description="The average rate of IPC calculated for 2 FPU pipelines."
+             data_type="float"
+             max_equation="2"
+             units="number"
+             semantic_type="ratio"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU to Data Port 0 Reads 64"
-             description="The subslice 0 EU data reads from Data Port with 64B per message."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_hdc0_reads64_b"
              units="messages"
-             symbol_name="EuHdc0Reads64B"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Data Port 0 to L3 Data Reads"
-             description="The subslice 0 Data Port data and constant reads from L3 cache."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="hdc0_l3_data_reads"
              units="messages"
-             symbol_name="Hdc0L3DataReads"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Data Port 0 to L3 Data Writes"
-             description="The subslice 0 Data Port data writes to L3 cache."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="hdc0_l3_data_writes"
              units="messages"
-             symbol_name="Hdc0L3DataWrites"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU to Data Port 0 Reads 32"
+             symbol_name="EuHdc0Reads32B"
+             underscore_name="eu_hdc0_reads32_b"
+             description="The subslice 0 EU data reads from Data Port with 32B per message."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EU to Data Port 0 Reads 64"
+             symbol_name="EuHdc0Reads64B"
+             underscore_name="eu_hdc0_reads64_b"
+             description="The subslice 0 EU data reads from Data Port with 64B per message."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EU to Data Port 0 Reads 128"
+             symbol_name="EuHdc0Reads128B"
+             underscore_name="eu_hdc0_reads128_b"
              description="The subslice 0 EU data reads from Data Port with 128B per message."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_hdc0_reads128_b"
              units="messages"
-             symbol_name="EuHdc0Reads128B"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="EU to Data Port 0 Reads 256"
+             symbol_name="EuHdc0Reads256B"
+             underscore_name="eu_hdc0_reads256_b"
+             description="The subslice 0 EU data reads from Data Port with 256B per message."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+             equation="B 7 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="All Data Port 0 Writes to L3"
-             description="The subslice 0 Data Port writes to L3 cache."
+    <counter name="Data Port 0 to L3 Data Reads"
+             symbol_name="Hdc0L3DataReads"
+             underscore_name="hdc0_l3_data_reads"
+             description="The subslice 0 Data Port data and constant reads from L3 cache."
              data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="hdc0_l3_writes"
              units="messages"
-             symbol_name="Hdc0L3Writes"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Data Port 0 to L3 Data Writes"
+             symbol_name="Hdc0L3DataWrites"
+             underscore_name="hdc0_l3_data_writes"
+             description="The subslice 0 Data Port data writes to L3 cache."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU to Data Port 0 Reads 32"
-             description="The subslice 0 EU data reads from Data Port with 32B per message."
+    <counter name="All Data Port 0 Reads from L3"
+             symbol_name="Hdc0L3Reads"
+             underscore_name="hdc0_l3_reads"
+             description="The subslice 0 Data Port reads from L3 cache."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_hdc0_reads32_b"
              units="messages"
-             symbol_name="EuHdc0Reads32B"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 3 READ C 2 READ USUB"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU to Data Port 0 Reads 256"
-             description="The subslice 0 EU data reads from Data Port with 256B per message."
+    <counter name="All Data Port 0 Writes to L3"
+             symbol_name="Hdc0L3Writes"
+             underscore_name="hdc0_l3_writes"
+             description="The subslice 0 Data Port writes to L3 cache."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_hdc0_reads256_b"
              units="messages"
-             symbol_name="EuHdc0Reads256B"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="EuBytesReadPerCacheLine"
+             symbol_name="EuBytesReadPerCacheLine"
+             underscore_name="eu_bytes_read_per_cache_line"
              description="Average EU bytes read per L3 cache line."
              data_type="float"
-             equation="$EuHdc0Reads32B 32 UMUL $EuHdc0Reads64B 64 UMUL $EuHdc0Reads128B 128 UMUL $EuHdc0Reads256B 256 UMUL UADD UADD UADD $Hdc0L3DataReads FDIV"
-             underscore_name="eu_bytes_read_per_cache_line"
              units="eu bytes per l3 cache line"
-             symbol_name="EuBytesReadPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="$EuHdc0Reads32B 32 UMUL $EuHdc0Reads64B 64 UMUL $EuHdc0Reads128B 128 UMUL $EuHdc0Reads256B 256 UMUL UADD UADD UADD $Hdc0L3DataReads FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="EuDataReadsPerCacheLine"
+             symbol_name="EuDataReadsPerCacheLine"
+             underscore_name="eu_data_reads_per_cache_line"
              description="Coalescing ratio of EU read requests to L3 cache lines."
              data_type="float"
-             equation="$EuBytesReadPerCacheLine 64 FDIV"
-             underscore_name="eu_data_reads_per_cache_line"
              units="utilization"
-             symbol_name="EuDataReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuBytesReadPerCacheLine 64 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="All Data Port 0 Reads from L3"
-             description="The subslice 0 Data Port reads from L3 cache."
-             data_type="uint64"
-             equation="C 3 READ C 2 READ USUB"
-             underscore_name="hdc0_l3_reads"
-             units="messages"
-             symbol_name="Hdc0L3Reads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA"
                      availability="$SubsliceMask 0x01 AND"
   </set>
 
   <set name="Data Port Writes Coalescing Gen8"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="data_port_writes_coalescing"
-       hw_config_guid="343ebc99-4a55-414c-8c17-d8e259cf5e20"
        chipset="BDW"
        symbol_name="DataPortWritesCoalescing"
+       underscore_name="data_port_writes_coalescing"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="343ebc99-4a55-414c-8c17-d8e259cf5e20"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU to Data Port 0 Writes 128"
-             description="The subslice 0 EU data simd16 writes to Data Port with 192B per message."
-             data_type="uint64"
-             equation="B 6 READ 2 UDIV"
-             underscore_name="eu_hdc0_writes192_b"
-             units="messages"
-             symbol_name="EuHdc0Writes192B"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
     <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
              description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
              units="texels"
-             symbol_name="SamplerTexels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="EU to Data Port 0 Writes 32B"
-             description="The subslice 0 EU data writes to Data Port with 32B per message."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_hdc0_writes32_b"
-             units="messages"
-             symbol_name="EuHdc0Writes32B"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU to Data Port 0 Writes 256B"
-             description="The subslice 0 EU data simd16 writes to Data Port with 256B per message."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 7 READ 2 UDIV"
-             underscore_name="eu_hdc0_writes256_b_simd16"
              units="messages"
-             symbol_name="EuHdc0Writes256BSimd16"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Data Port 0 to L3 Data Reads"
-             description="The subslice 0 Data Port data and constant reads from L3 cache."
+    <counter name="EU to Data Port 0 Writes 32B"
+             symbol_name="EuHdc0Writes32B"
+             underscore_name="eu_hdc0_writes32_b"
+             description="The subslice 0 EU data writes to Data Port with 32B per message."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="hdc0_l3_data_reads"
              units="messages"
-             symbol_name="Hdc0L3DataReads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Data Port 0 to L3 Data Writes"
-             description="The subslice 0 Data Port data writes to L3 cache."
+    <counter name="EU to Data Port 0 Writes 64B"
+             symbol_name="EuHdc0Writes64B"
+             underscore_name="eu_hdc0_writes64_b"
+             description="The subslice 0 EU data writes to Data Port with 64B per message."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="hdc0_l3_data_writes"
              units="messages"
-             symbol_name="Hdc0L3DataWrites"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ B 4 READ UADD"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="EU to Data Port 0 Writes 64B"
-             description="The subslice 0 EU data simd16 writes to Data Port with 128B per message."
+             symbol_name="EuHdc0Writes96B"
+             underscore_name="eu_hdc0_writes96_b"
+             description="The subslice 0 EU data writes to Data Port with 64B per message."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_hdc0_writes128_b_simd16"
              units="messages"
-             symbol_name="EuHdc0Writes128BSimd16"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ 2 UDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU to Data Port 0 Writes 64B"
-             description="The subslice 0 EU data writes to Data Port with 64B per message."
+    <counter name="EU to Data Port 0 Writes 128"
+             symbol_name="EuHdc0Writes128B"
+             underscore_name="eu_hdc0_writes128_b"
+             description="The subslice 0 EU data writes to Data Port with 128B per message."
              data_type="uint64"
-             equation="B 2 READ 2 UDIV"
-             underscore_name="eu_hdc0_writes96_b"
              units="messages"
-             symbol_name="EuHdc0Writes96B"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ 2 UDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="All Data Port 0 Writes to L3"
-             description="The subslice 0 Data Port writes to L3 cache."
+    <counter name="EU to Data Port 0 Writes 128"
+             symbol_name="EuHdc0Writes192B"
+             underscore_name="eu_hdc0_writes192_b"
+             description="The subslice 0 EU data simd16 writes to Data Port with 192B per message."
              data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="hdc0_l3_writes"
              units="messages"
-             symbol_name="Hdc0L3Writes"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="B 6 READ 2 UDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EU to Data Port 0 Writes 64B"
-             description="The subslice 0 EU data writes to Data Port with 64B per message."
+             symbol_name="EuHdc0Writes128BSimd16"
+             underscore_name="eu_hdc0_writes128_b_simd16"
+             description="The subslice 0 EU data simd16 writes to Data Port with 128B per message."
              data_type="uint64"
-             equation="B 1 READ B 4 READ UADD"
-             underscore_name="eu_hdc0_writes64_b"
              units="messages"
-             symbol_name="EuHdc0Writes64B"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU to Data Port 0 Writes 128"
-             description="The subslice 0 EU data writes to Data Port with 128B per message."
+    <counter name="EU to Data Port 0 Writes 256B"
+             symbol_name="EuHdc0Writes256BSimd16"
+             underscore_name="eu_hdc0_writes256_b_simd16"
+             description="The subslice 0 EU data simd16 writes to Data Port with 256B per message."
              data_type="uint64"
-             equation="B 3 READ 2 UDIV"
-             underscore_name="eu_hdc0_writes128_b"
              units="messages"
-             symbol_name="EuHdc0Writes128B"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="EuBytesWrittenPerCacheLine"
-             description="Average EU bytes written per L3 cache line."
-             data_type="float"
-             equation="$EuHdc0Writes32B 32 UMUL $EuHdc0Writes64B 64 UMUL $EuHdc0Writes96B 96 UMUL $EuHdc0Writes128B 128 UMUL $EuHdc0Writes128BSimd16 128 UMUL $EuHdc0Writes256BSimd16 256 UMUL $EuHdc0Writes192B 192 UMUL UADD UADD UADD UADD UADD UADD $Hdc0L3DataWrites FDIV"
-             underscore_name="eu_bytes_written_per_cache_line"
-             units="eu bytes per l3 cache line"
-             symbol_name="EuBytesWrittenPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="B 7 READ 2 UDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="EuDataWritesPerCacheLine"
-             description="Coalescing ratio of EU write requests to L3 cache lines."
-             data_type="float"
-             equation="$EuBytesWrittenPerCacheLine 64 FDIV"
-             underscore_name="eu_data_writes_per_cache_line"
-             units="utilization"
-             symbol_name="EuDataWritesPerCacheLine"
-             semantic_type="ratio"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Data Port 0 to L3 Data Reads"
+             symbol_name="Hdc0L3DataReads"
+             underscore_name="hdc0_l3_data_reads"
+             description="The subslice 0 Data Port data and constant reads from L3 cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="C 4 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="Data Port 0 to L3 Data Writes"
+             symbol_name="Hdc0L3DataWrites"
+             underscore_name="hdc0_l3_data_writes"
+             description="The subslice 0 Data Port data writes to L3 cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL 2 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="All Data Port 0 Reads from L3"
+             symbol_name="Hdc0L3Reads"
+             underscore_name="hdc0_l3_reads"
              description="The subslice 0 Data Port reads from L3 cache."
              data_type="uint64"
-             equation="C 3 READ C 2 READ USUB"
-             underscore_name="hdc0_l3_reads"
              units="messages"
-             symbol_name="Hdc0L3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 3 READ C 2 READ USUB"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="All Data Port 0 Writes to L3"
+             symbol_name="Hdc0L3Writes"
+             underscore_name="hdc0_l3_writes"
+             description="The subslice 0 Data Port writes to L3 cache."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="EuBytesWrittenPerCacheLine"
+             symbol_name="EuBytesWrittenPerCacheLine"
+             underscore_name="eu_bytes_written_per_cache_line"
+             description="Average EU bytes written per L3 cache line."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu bytes per l3 cache line"
+             semantic_type="ratio"
+             equation="$EuHdc0Writes32B 32 UMUL $EuHdc0Writes64B 64 UMUL $EuHdc0Writes96B 96 UMUL $EuHdc0Writes128B 128 UMUL $EuHdc0Writes128BSimd16 128 UMUL $EuHdc0Writes256BSimd16 256 UMUL $EuHdc0Writes192B 192 UMUL UADD UADD UADD UADD UADD UADD $Hdc0L3DataWrites FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuDataWritesPerCacheLine"
+             symbol_name="EuDataWritesPerCacheLine"
+             underscore_name="eu_data_writes_per_cache_line"
+             description="Coalescing ratio of EU write requests to L3 cache lines."
+             data_type="float"
+             units="utilization"
+             semantic_type="ratio"
+             equation="$EuBytesWrittenPerCacheLine 64 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA"
                      availability="$SubsliceMask 0x01 AND"
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="7bdafd88-a4fa-4ed5-bc09-1a977aa5be3e"
        chipset="BDW"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="7bdafd88-a4fa-4ed5-bc09-1a977aa5be3e"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 1 READ C 0 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader12_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader12AccessStalledOnL3"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HDC stalled by L3 (s0.ss0)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ B 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader10_access_stalled_on_l3"
-             units="percent"
-             symbol_name="NonSamplerShader10AccessStalledOnL3"
-             availability="$SubsliceMask 0x8 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 21 READ 4 UMUL"
              mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
-             data_type="float"
-             max_equation="100"
-             equation="B 7 READ B 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader11_access_stalled_on_l3"
-             units="percent"
-             symbol_name="NonSamplerShader11AccessStalledOnL3"
-             availability="$SubsliceMask 0x10 AND"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             />
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader12AccessStalledOnL3"
+             underscore_name="non_sampler_shader12_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ C 0 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader11AccessStalledOnL3"
+             underscore_name="non_sampler_shader11_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 7 READ B 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader10AccessStalledOnL3"
+             underscore_name="non_sampler_shader10_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ B 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="9385ebb2-f34f-4aa5-aec5-7e9cbbea0f0b"
        chipset="BDW"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="9385ebb2-f34f-4aa5-aec5-7e9cbbea0f0b"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank1 Stalled"
-             description="The percentage of time in which slice1 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank1_stalled"
-             units="percent"
-             symbol_name="L31Bank1Stalled"
-             availability="$SliceMask 0x2 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Slice1 L3 Bank0 Stalled"
-             description="The percentage of time in which slice1 L3 bank0 is stalled"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank0_stalled"
              units="percent"
-             symbol_name="L31Bank0Stalled"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Slice1 L3 Bank1 Active"
-             description="The percentage of time in which slice1 L3 bank1 is active"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank1_active"
              units="percent"
-             symbol_name="L31Bank1Active"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank0 Active"
-             description="The percentage of time in which slice1 L3 bank0 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank0_active"
-             units="percent"
-             symbol_name="L31Bank0Active"
-             availability="$SliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice1 L3 Bank0 Stalled"
+             symbol_name="L31Bank0Stalled"
+             underscore_name="l31_bank0_stalled"
+             description="The percentage of time in which slice1 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank1 Stalled"
+             symbol_name="L31Bank1Stalled"
+             underscore_name="l31_bank1_stalled"
+             description="The percentage of time in which slice1 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank1 Active"
+             symbol_name="L31Bank1Active"
+             underscore_name="l31_bank1_active"
+             description="The percentage of time in which slice1 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank0 Active"
+             symbol_name="L31Bank0Active"
+             underscore_name="l31_bank0_active"
+             description="The percentage of time in which slice1 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set L3_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="446ae59b-ff2e-41c9-b49e-0184a54bf00a"
        chipset="BDW"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="446ae59b-ff2e-41c9-b49e-0184a54bf00a"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 6 READ"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
              units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set L3_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="84a7956f-1ea4-4d0d-837f-e39a0376e38c"
        chipset="BDW"
        symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="84a7956f-1ea4-4d0d-837f-e39a0376e38c"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 6 READ"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 L3 Bank3 Stalled"
-             description="The percentage of time in which slice0 L3 bank3 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_stalled"
-             units="percent"
-             symbol_name="L30Bank3Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank3 Active"
-             description="The percentage of time in which slice1 L3 bank3 is active"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank3_active"
              units="percent"
-             symbol_name="L31Bank3Active"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank3 Stalled"
-             description="The percentage of time in which slice1 L3 bank3 is stalled"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank3_stalled"
              units="percent"
-             symbol_name="L31Bank3Stalled"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
              units="pixels"
-             symbol_name="SamplesKilledInPs"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
              symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank3 Stalled"
+             symbol_name="L30Bank3Stalled"
+             underscore_name="l30_bank3_stalled"
+             description="The percentage of time in which slice0 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank3 Stalled"
+             symbol_name="L31Bank3Stalled"
+             underscore_name="l31_bank3_stalled"
+             description="The percentage of time in which slice1 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank3 Active"
+             symbol_name="L31Bank3Active"
+             underscore_name="l31_bank3_active"
+             description="The percentage of time in which slice1 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set L3_4"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_4"
-       hw_config_guid="92b493d9-df18-4bed-be06-5cac6f2a6f5f"
        chipset="BDW"
        symbol_name="L3_4"
+       underscore_name="l3_4"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="92b493d9-df18-4bed-be06-5cac6f2a6f5f"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 6 READ"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Stalled"
-             description="The percentage of time in which slice0 L3 bank2 is stalled"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_stalled"
              units="percent"
-             symbol_name="L30Bank2Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank2 Active"
-             description="The percentage of time in which slice1 L3 bank2 is active"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank2_active"
              units="percent"
-             symbol_name="L31Bank2Active"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Slice1 L3 Bank2 Stalled"
-             description="The percentage of time in which slice1 L3 bank2 is stalled"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank2_stalled"
              units="percent"
-             symbol_name="L31Bank2Stalled"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank2 Stalled"
+             symbol_name="L30Bank2Stalled"
+             underscore_name="l30_bank2_stalled"
+             description="The percentage of time in which slice0 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank2 Stalled"
+             symbol_name="L31Bank2Stalled"
+             underscore_name="l31_bank2_stalled"
+             description="The percentage of time in which slice1 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank2 Active"
+             symbol_name="L31Bank2Active"
+             underscore_name="l31_bank2_active"
+             description="The percentage of time in which slice1 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="14345c35-cc46-40d0-bb04-6ed1fbb43679"
        chipset="BDW"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="14345c35-cc46-40d0-bb04-6ed1fbb43679"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 Rasterizer Input Available"
-             description="The percentage of time in which slice1 rasterizer input is available"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer1_input_available"
              units="percent"
-             symbol_name="Rasterizer1InputAvailable"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 6 READ"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
-             units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 Pixel Values Ready"
-             description="The percentage of time in which slice1 pixel values are ready"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values1_ready"
              units="percent"
-             symbol_name="PixelValues1Ready"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Slice1 PS Output Available"
-             description="The percentage of time in which slice1 PS output is available"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output1_available"
              units="percent"
-             symbol_name="PSOutput1Available"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice1 Rasterizer Output Ready"
-             description="The percentage of time in which slice1 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer1_output_ready"
-             units="percent"
-             symbol_name="Rasterizer1OutputReady"
-             availability="$SliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice1 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data1_ready"
-             units="percent"
-             symbol_name="PixelData1Ready"
-             availability="$SliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData1Ready"
+             underscore_name="pixel_data1_ready"
+             description="The percentage of time in which slice1 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 Rasterizer Input Available"
+             symbol_name="Rasterizer1InputAvailable"
+             underscore_name="rasterizer1_input_available"
+             description="The percentage of time in which slice1 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 Rasterizer Output Ready"
+             symbol_name="Rasterizer1OutputReady"
+             underscore_name="rasterizer1_output_ready"
+             description="The percentage of time in which slice1 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+    <counter name="Slice1 Pixel Values Ready"
+             symbol_name="PixelValues1Ready"
+             underscore_name="pixel_values1_ready"
+             description="The percentage of time in which slice1 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 PS Output Available"
+             symbol_name="PSOutput1Available"
+             underscore_name="ps_output1_available"
+             description="The percentage of time in which slice1 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set Sampler_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler_1"
-       hw_config_guid="f0c6ba37-d3d3-4211-91b5-226730312a54"
        chipset="BDW"
        symbol_name="Sampler_1"
+       underscore_name="sampler_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="f0c6ba37-d3d3-4211-91b5-226730312a54"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Slice1 Subslice0 Input Available"
-             description="The percentage of time in which slice1 subslice0 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler10_input_available"
-             units="percent"
-             symbol_name="Sampler10InputAvailable"
-             availability="$SubsliceMask 0x8 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Slice1 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice1 subslice2 sampler output is ready"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler12_output_ready"
              units="percent"
-             symbol_name="Sampler12OutputReady"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 Subslice1 Input Available"
-             description="The percentage of time in which slice1 subslice1 sampler input is available"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler11_input_available"
              units="percent"
-             symbol_name="Sampler11InputAvailable"
-             availability="$SubsliceMask 0x10 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice1 Subslice2 Input Available"
-             description="The percentage of time in which slice1 subslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler12_input_available"
-             units="percent"
-             symbol_name="Sampler12InputAvailable"
-             availability="$SubsliceMask 0x20 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Slice1 Subslice0 Sampler Output Ready"
-             description="The percentage of time in which slice1 subslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler10_output_ready"
-             units="percent"
-             symbol_name="Sampler10OutputReady"
-             availability="$SubsliceMask 0x8 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Slice1 Subslice1 Sampler Output Ready"
-             description="The percentage of time in which slice1 subslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler11_output_ready"
-             units="percent"
-             symbol_name="Sampler11OutputReady"
-             availability="$SubsliceMask 0x10 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice1 Subslice1 Input Available"
+             symbol_name="Sampler11InputAvailable"
+             underscore_name="sampler11_input_available"
+             description="The percentage of time in which slice1 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice2 Input Available"
+             symbol_name="Sampler12InputAvailable"
+             underscore_name="sampler12_input_available"
+             description="The percentage of time in which slice1 subslice2 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice0 Input Available"
+             symbol_name="Sampler10InputAvailable"
+             underscore_name="sampler10_input_available"
+             description="The percentage of time in which slice1 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler12OutputReady"
+             underscore_name="sampler12_output_ready"
+             description="The percentage of time in which slice1 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler10OutputReady"
+             underscore_name="sampler10_output_ready"
+             description="The percentage of time in which slice1 subslice0 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler11OutputReady"
+             underscore_name="sampler11_output_ready"
+             description="The percentage of time in which slice1 subslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set Sampler_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler_2"
-       hw_config_guid="30bf3702-48cf-4bca-b412-7cf50bb2f564"
        chipset="BDW"
        symbol_name="Sampler_2"
+       underscore_name="sampler_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="30bf3702-48cf-4bca-b412-7cf50bb2f564"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
-             units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
-             units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice0 Subslice0 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
-             units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Slice0 Subslice1 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
-             units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
+             description="The percentage of time in which slice0 subslice0 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
+             description="The percentage of time in which slice0 subslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="238bec85-df05-44f3-b905-d166712f2451"
        chipset="BDW"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="238bec85-df05-44f3-b905-d166712f2451"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 22 READ 4 UMUL"
              mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread11_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread11ReadyForDispatch"
-             availability="$SubsliceMask 0x10 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread12_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread12ReadyForDispatch"
-             availability="$SubsliceMask 0x20 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice0 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread10_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread10ReadyForDispatch"
-             availability="$SubsliceMask 0x8 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
-             units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
-             data_type="float"
-             max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
-             units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice1"
+             symbol_name="NonPSThread11ReadyForDispatch"
+             underscore_name="non_ps_thread11_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice0"
+             symbol_name="PSThread10ReadyForDispatch"
+             underscore_name="ps_thread10_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice0"
+             symbol_name="NonPSThread10ReadyForDispatch"
+             underscore_name="non_ps_thread10_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice2"
+             symbol_name="PSThread12ReadyForDispatch"
+             underscore_name="ps_thread12_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice2"
+             symbol_name="NonPSThread12ReadyForDispatch"
+             underscore_name="non_ps_thread12_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice1"
+             symbol_name="PSThread11ReadyForDispatch"
+             underscore_name="ps_thread11_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice2 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread12_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread12ReadyForDispatch"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice0 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread10_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread10ReadyForDispatch"
-             availability="$SubsliceMask 0x8 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice1"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice1 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread11_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread11ReadyForDispatch"
-             availability="$SubsliceMask 0x10 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="24bf02cd-8693-4583-981c-c4165b33da01"
        chipset="BDW"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="24bf02cd-8693-4583-981c-c4165b33da01"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice1 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 1"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header11_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader11ReadyPort1"
-             availability="$SubsliceMask 0x10 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
     <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
              description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice1 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 0"
-             data_type="float"
-             max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header11_ready_port0"
-             units="percent"
-             symbol_name="ThreadHeader11ReadyPort0"
-             availability="$SubsliceMask 0x10 AND"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice1 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 0"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header12_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader12ReadyPort0"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice1 Subslice0 Port 1"
-             description="The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 1"
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header10_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader10ReadyPort1"
-             availability="$SubsliceMask 0x8 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+    <counter name="Thread Header Ready on Slice1 Subslice2 Port 0"
+             symbol_name="ThreadHeader12ReadyPort0"
+             underscore_name="thread_header12_ready_port0"
+             description="The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Thread Header Ready on Slice1 Subslice2 Port 1"
+             symbol_name="ThreadHeader12ReadyPort1"
+             underscore_name="thread_header12_ready_port1"
              description="The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header12_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader12ReadyPort1"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+    <counter name="Thread Header Ready on Slice1 Subslice1 Port 1"
+             symbol_name="ThreadHeader11ReadyPort1"
+             underscore_name="thread_header11_ready_port1"
+             description="The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
     <counter name="Thread Header Ready on Slice1 Subslice0 Port 0"
+             symbol_name="ThreadHeader10ReadyPort0"
+             underscore_name="thread_header10_ready_port0"
              description="The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header10_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader10ReadyPort0"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice1 Subslice0 Port 1"
+             symbol_name="ThreadHeader10ReadyPort1"
+             underscore_name="thread_header10_ready_port1"
+             description="The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+    <counter name="Thread Header Ready on Slice1 Subslice1 Port 0"
+             symbol_name="ThreadHeader11ReadyPort0"
+             underscore_name="thread_header11_ready_port0"
+             description="The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen8"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="8fb61ba2-2fbb-454c-a136-2dec5a8a595e"
        chipset="BDW"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="8fb61ba2-2fbb-454c-a136-2dec5a8a595e"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active including Ext Math"
+             symbol_name="Fpu1ActiveAdjusted"
+             underscore_name="fpu1_active_adjusted"
              description="The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ  C 5 READ C 6 READ FADD C 7 READ FADD C 2 READ FADD C 3 READ FADD C 4 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active_adjusted"
              units="percent"
-             symbol_name="Fpu1ActiveAdjusted"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 System Frame Batch Draw"
+             equation="A 8 READ  C 5 READ C 6 READ FADD C 7 READ FADD C 2 READ FADD C 3 READ FADD C 4 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Media Vme Pipe Gen8"
-       mdapi_supported_apis="MEDIA IO BB"
-       underscore_name="vme_pipe"
-       hw_config_guid="e1743ca0-7fc8-410b-a066-de7bbb9280b7"
        chipset="BDW"
        symbol_name="VMEPipe"
+       underscore_name="vme_pipe"
+       mdapi_supported_apis="MEDIA IO BB"
+       hw_config_guid="e1743ca0-7fc8-410b-a066-de7bbb9280b7"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VME Busy"
-             description="The percentage of time in which VME (IME or CRE) was actively processing data."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vme_busy"
              units="percent"
-             symbol_name="VMEBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Overview System Batch Tier2"
-             mdapi_group="VME Pipe"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VME Busy"
+             symbol_name="VMEBusy"
+             underscore_name="vme_busy"
+             description="The percentage of time in which VME (IME or CRE) was actively processing data."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="VME Pipe"
+             mdapi_usage_flags="Overview System Batch Tier2"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="0a9eb7be-feee-4275-a139-6d9cedf0fdb0"
        chipset="BDW"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="0a9eb7be-feee-4275-a139-6d9cedf0fdb0"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Blitter Ring Busy"
-             description="The percentage of time when blitter command streamer was busy."
+    <counter name="Vdbox1 Ring Busy"
+             symbol_name="Vdbox1Busy"
+             underscore_name="vdbox1_busy"
+             description="The percentage of time when Vdbox1 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="AnyRingBusy"
-             description="The percentage of time when any command streamer was busy."
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vdbox1 Ring Busy"
-             description="The percentage of time when Vdbox1 command streamer was busy."
+    <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
+             description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox1_busy"
              units="percent"
-             symbol_name="Vdbox1Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
+             description="The percentage of time when any command streamer was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen8"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="d6de6f55-e526-4f79-a6a6-d7315c09044e"
        chipset="BDW"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="d6de6f55-e526-4f79-a6a6-d7315c09044e"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="B 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
              description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
              description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
              description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="B 6 READ"
              mdapi_group="GPU"
-             />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
-             units="events"
-             symbol_name="Counter3"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
-             data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
-             units="events"
-             symbol_name="Counter0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 IO BB"
-       underscore_name="stc__pma_stall"
-       hw_config_guid="e713f347-953e-4d8c-b02f-6be31df2db2b"
        chipset="BDW"
        symbol_name="STC_PmaStall"
+       underscore_name="stc__pma_stall"
+       mdapi_supported_apis="OGL OGL4 IO BB"
+       hw_config_guid="e713f347-953e-4d8c-b02f-6be31df2db2b"
        >
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="STC PMA stall"
-             description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GPU/Stencil Cache"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
              description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
              units="threads"
-             symbol_name="HsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
+             description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GPU/Stencil Cache"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index db018a3..ed7a3d9 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1522878593" merge_md5="">
   <set name="Render Metrics Basic Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="22b9519a-e9ba-4c41-8b54-f4f8ca14fa0a"
        chipset="BXT"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="22b9519a-e9ba-4c41-8b54-f4f8ca14fa0a"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
-             data_type="uint64"
-             equation="B 4 READ 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler1_bottleneck"
              units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
              description="The percentage of time in which Sampler 0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which Sampler 1 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="L3 Lookup Accesses w/o IC"
-             description="The total number of L3 cache lookup accesses w/o IC."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
              units="messages"
-             symbol_name="L3Lookups"
              semantic_type="event"
+             equation="B 4 READ 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
+             description="The total number of L3 cache lookup accesses w/o IC."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Depth Throughput"
-             description="The total number of GPU memory bytes transferred between depth caches and GTI."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
+             description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
              description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
              mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA"
                      availability="$SkuRevisionId 0x03 UGTE"
   </set>
 
   <set name="Compute Metrics Basic Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="012d72cf-82a9-4d25-8ddf-74076fd30797"
        chipset="BXT"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="012d72cf-82a9-4d25-8ddf-74076fd30797"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL  $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ B 1 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL  $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 1 READ C 2 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="ce416533-e49e-4211-80af-ec513590a914"
        chipset="BXT"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="ce416533-e49e-4211-80af-ec513590a914"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="VsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             high_watermark="15"
              data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
              units="percent"
-             symbol_name="SfBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
     <counter name="SO Bottleneck"
-             low_watermark="5"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
              description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             high_watermark="10"
              data_type="float"
-             high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Clipper Bottleneck"
+    <counter name="Early Depth Bottleneck"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
+             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
              low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
-    <counter name="Early Depth Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
              data_type="float"
-             high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Reads Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="398e2452-18d7-42d0-b241-e4d0a9148ada"
        chipset="BXT"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="398e2452-18d7-42d0-b241-e4d0a9148ada"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
-             data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
-             semantic_type="event"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
+             units="threads"
              semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
              units="messages"
-             symbol_name="GtiL3Bank1Reads"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
              description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
              units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
              units="messages"
-             symbol_name="GtiL3Bank2Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ"
              mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Writes Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="d324a0d6-7269-4847-a5c2-6f71ddc7fed5"
        chipset="BXT"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="d324a0d6-7269-4847-a5c2-6f71ddc7fed5"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
-             data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiSoMemoryWrites"
-             description="The total number of GTI memory writes from Stream Output."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
+             description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Extended Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="caf3596a-7bb1-4dec-b3b3-2a080d283b49"
        chipset="BXT"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="caf3596a-7bb1-4dec-b3b3-2a080d283b49"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
-             units="messages"
-             symbol_name="EuTypedAtomics0"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
              description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
              units="messages"
-             symbol_name="UntypedWrites0"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
              description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
              units="messages"
-             symbol_name="EuUntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
+             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
              description="The subslice 0 EU A64 Untyped Writes subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
              units="messages"
-             symbol_name="EuA64UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="B 7 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
-             data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
-             units="messages"
-             symbol_name="TypedReads0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedReads0"
-             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
-             data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
-             units="messages"
-             symbol_name="EuTypedReads0"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
              description="The ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="The ratio of EU typed read requests to L3 cache line reads."
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="49b956e2-d5b9-47e0-9d8a-cee5e8cec527"
        chipset="BXT"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="49b956e2-d5b9-47e0-9d8a-cee5e8cec527"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
-             data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
-             data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu0Instruction"
-             semantic_type="duration"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
              symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
              units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
              units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
              units="messages"
-             symbol_name="SamplerAccesses"
              semantic_type="event"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="L3 Bank 01 Accesses"
-             description="The total number of accesses to L3 Bank 01."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+    <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to L3 Bank 01."
+             data_type="uint64"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
              description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
              mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
-             units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="f64ef50a-bdba-4b35-8f09-203c13d8ee5a"
        chipset="BXT"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="f64ef50a-bdba-4b35-8f09-203c13d8ee5a"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="00ad5a41-7eab-4f7a-9103-49d411c67219"
        chipset="BXT"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="00ad5a41-7eab-4f7a-9103-49d411c67219"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
-             units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA"
                      availability="$SkuRevisionId 0x03 UGTE"
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="46dc44ca-491c-4cc1-a951-e7b3e62bf02b"
        chipset="BXT"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="46dc44ca-491c-4cc1-a951-e7b3e62bf02b"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
              units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             />
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
              description="The percentage of time in which slice0 PS output is available"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set Sampler"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler"
-       hw_config_guid="8364e2a8-af63-40af-b0d5-42969a255654"
        chipset="BXT"
        symbol_name="Sampler"
+       underscore_name="sampler"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="8364e2a8-af63-40af-b0d5-42969a255654"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
              units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
              units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 22 READ 4 UMUL"
              mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
              description="The percentage of time in which slice0 subslice0 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
              units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
              description="The percentage of time in which slice0 subslice1 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
              units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="175c8092-cb25-4d1e-8dc7-b4fdd39e2d92"
        chipset="BXT"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="175c8092-cb25-4d1e-8dc7-b4fdd39e2d92"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="d260f03f-b34d-4b49-a44e-436819117332"
        chipset="BXT"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="d260f03f-b34d-4b49-a44e-436819117332"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="fa6ecf21-2cb8-4d0b-9308-6e4a7b4ca87a"
        chipset="BXT"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="fa6ecf21-2cb8-4d0b-9308-6e4a7b4ca87a"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active including Ext Math"
+             symbol_name="Fpu1ActiveAdjusted"
+             underscore_name="fpu1_active_adjusted"
              description="The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active_adjusted"
              units="percent"
-             symbol_name="Fpu1ActiveAdjusted"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="c9f5fa3a-d14f-400c-a89a-211206b00ee7"
        chipset="BXT"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="c9f5fa3a-d14f-400c-a89a-211206b00ee7"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
              description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
              description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="5ee72f5c-092f-421e-8b70-225f7c3e9612"
        chipset="BXT"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="5ee72f5c-092f-421e-8b70-225f7c3e9612"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
-       underscore_name="pma__stall"
-       hw_config_guid="d49cd0d8-8c7f-4465-94fc-51e08c9050bc"
        chipset="BXT"
        symbol_name="PMA_Stall"
+       underscore_name="pma__stall"
+       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
+       hw_config_guid="d49cd0d8-8c7f-4465-94fc-51e08c9050bc"
        >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
              description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GPU/Stencil Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index b167c11..fce5933 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1553959549" merge_md5="">
   <set name="Render Metrics Basic Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="7fa796a4-0c7a-4201-afc6-cff0b2f528a2"
        chipset="CFLGT2"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="7fa796a4-0c7a-4201-afc6-cff0b2f528a2"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
-             data_type="uint64"
-             equation="B 4 READ 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler1_bottleneck"
              units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
              description="The percentage of time in which Sampler 0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which Sampler 1 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
+             equation="B 4 READ 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
     <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
              description="The total number of L3 cache lookup accesses w/o IC."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
              units="messages"
-             symbol_name="L3Lookups"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
              description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
              mdapi_group="GTI/Depth Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
+             description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI RCC Throughput"
-             description="The total number of GPU memory bytes transferred between render color caches and GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Color Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Basic Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="dc8cf7ea-26b4-4478-ac93-dab174f92ac0"
        chipset="CFLGT2"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="dc8cf7ea-26b4-4478-ac93-dab174f92ac0"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="2221e4d5-ed7b-445e-b2cc-3de1b97f4d42"
        chipset="CFLGT2"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="2221e4d5-ed7b-445e-b2cc-3de1b97f4d42"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             high_watermark="15"
              data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
              units="percent"
-             symbol_name="SfBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
     <counter name="SO Bottleneck"
-             low_watermark="5"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
              description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Early Depth Bottleneck"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
+             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Clipper Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
-    <counter name="Early Depth Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
              data_type="float"
-             high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Reads Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="82096a90-e2fa-4f38-ac14-562b2496933a"
        chipset="CFLGT2"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="82096a90-e2fa-4f38-ac14-562b2496933a"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
              units="messages"
-             symbol_name="GtiL3Bank1Reads"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiCmdStreamerMemoryReads"
-             description="The total number of GTI memory reads from Command Streamer."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
              units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
+    <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
+             description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
              units="messages"
-             symbol_name="GtiL3Bank2Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Writes Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="9f638880-02e9-4a8d-896a-7670a3bf0d35"
        chipset="CFLGT2"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="9f638880-02e9-4a8d-896a-7670a3bf0d35"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiSoMemoryWrites"
-             description="The total number of GTI memory writes from Stream Output."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
+             description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Extended Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="8d4ad934-7c16-43d5-845a-51067a4c8e2f"
        chipset="CFLGT2"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="8d4ad934-7c16-43d5-845a-51067a4c8e2f"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
-             units="messages"
-             symbol_name="EuTypedAtomics0"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
              description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
              units="messages"
-             symbol_name="UntypedWrites0"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
              description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
              units="messages"
-             symbol_name="EuUntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
+             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
              description="The subslice 0 EU A64 Untyped Writes subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
              units="messages"
-             symbol_name="EuA64UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
-             data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
-             units="messages"
-             symbol_name="TypedReads0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedReads0"
-             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
-             data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
-             units="messages"
-             symbol_name="EuTypedReads0"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
              description="The ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="The ratio of EU typed read requests to L3 cache line reads."
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="4389cf07-1424-4963-b2d2-64fcec75406d"
        chipset="CFLGT2"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="4389cf07-1424-4963-b2d2-64fcec75406d"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
-             data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
-             data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu0Instruction"
-             semantic_type="duration"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
              units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
              units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
              units="messages"
-             symbol_name="SamplerAccesses"
              semantic_type="event"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="L3 Bank 01 Accesses"
-             description="The total number of accesses to L3 Bank 01."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to L3 Bank 01."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
              description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
              mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
-             units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="OA" address="0x00002794" value="0x0000FBEF" />
         <register type="OA" address="0x00002798" value="0x0007FFFA" />
         <register type="OA" address="0x0000279C" value="0x0000FBDF" />
-    </register_config>
-    <register_config type="FLEX">
-        <register type="FLEX" address="0x0000E458" value="0x00005004" />
-        <register type="FLEX" address="0x0000E558" value="0x00000003" />
-        <register type="FLEX" address="0x0000E658" value="0x00002001" />
-        <register type="FLEX" address="0x0000E758" value="0x00101100" />
-        <register type="FLEX" address="0x0000E45C" value="0x00201200" />
-        <register type="FLEX" address="0x0000E55C" value="0x00301300" />
-        <register type="FLEX" address="0x0000E65C" value="0x00401400" />
-    </register_config>
-  </set>
-
-  <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="1c003bbe-ca7f-49d0-bb0f-2f0096147a00"
-       chipset="CFLGT2"
-       symbol_name="HDCAndSF"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+    </register_config>
+    <register_config type="FLEX">
+        <register type="FLEX" address="0x0000E458" value="0x00005004" />
+        <register type="FLEX" address="0x0000E558" value="0x00000003" />
+        <register type="FLEX" address="0x0000E658" value="0x00002001" />
+        <register type="FLEX" address="0x0000E758" value="0x00101100" />
+        <register type="FLEX" address="0x0000E45C" value="0x00201200" />
+        <register type="FLEX" address="0x0000E55C" value="0x00301300" />
+        <register type="FLEX" address="0x0000E65C" value="0x00401400" />
+    </register_config>
+  </set>
+
+  <set name="Metric set HDCAndSF"
+       chipset="CFLGT2"
+       symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="1c003bbe-ca7f-49d0-bb0f-2f0096147a00"
+       >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="2a208cb2-0f82-4518-844d-c4c4699659a1"
        chipset="CFLGT2"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="2a208cb2-0f82-4518-844d-c4c4699659a1"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
-             units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="bf38efc7-7a5c-4cc9-87ff-cbb4b954b4ec"
        chipset="CFLGT2"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="bf38efc7-7a5c-4cc9-87ff-cbb4b954b4ec"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Stalled"
-             description="The percentage of time in which slice0 L3 bank2 is stalled"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_stalled"
              units="percent"
-             symbol_name="L30Bank2Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank2 Stalled"
+             symbol_name="L30Bank2Stalled"
+             underscore_name="l30_bank2_stalled"
+             description="The percentage of time in which slice0 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="a9cf8100-606e-4cb1-a509-e69f8588c050"
        chipset="CFLGT2"
        symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="a9cf8100-606e-4cb1-a509-e69f8588c050"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 L3 Bank3 Stalled"
-             description="The percentage of time in which slice0 L3 bank3 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_stalled"
-             units="percent"
-             symbol_name="L30Bank3Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank3 Stalled"
+             symbol_name="L30Bank3Stalled"
+             underscore_name="l30_bank3_stalled"
+             description="The percentage of time in which slice0 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="4905b811-fbb0-4f51-aacd-3d46555aad3d"
        chipset="CFLGT2"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="4905b811-fbb0-4f51-aacd-3d46555aad3d"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
-             units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set Sampler"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler"
-       hw_config_guid="00defd09-c283-4d34-a3c5-e2c4f9120adf"
        chipset="CFLGT2"
        symbol_name="Sampler"
+       underscore_name="sampler"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="00defd09-c283-4d34-a3c5-e2c4f9120adf"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
-             units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
-             units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice0 Subslice0 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
-             units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Slice0 Subslice1 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
-             units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
+             description="The percentage of time in which slice0 subslice0 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
+             description="The percentage of time in which slice0 subslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="87b33db5-ad38-4a08-a9e7-5f807dee1a45"
        chipset="CFLGT2"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="87b33db5-ad38-4a08-a9e7-5f807dee1a45"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="fbf2fbc7-c1ce-4b7a-8f32-cf60eb947fa5"
        chipset="CFLGT2"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="fbf2fbc7-c1ce-4b7a-8f32-cf60eb947fa5"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="5f679fb0-909e-4c0e-b4b2-8e801f83e71b"
        chipset="CFLGT2"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="5f679fb0-909e-4c0e-b4b2-8e801f83e71b"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active including Ext Math"
+             symbol_name="Fpu1ActiveAdjusted"
+             underscore_name="fpu1_active_adjusted"
              description="The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active_adjusted"
              units="percent"
-             symbol_name="Fpu1ActiveAdjusted"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
     </register_config>
   </set>
 
-  <set name="Media Vme Pipe Gen9"
-       mdapi_supported_apis="MEDIA IO BB"
-       underscore_name="vme_pipe"
-       hw_config_guid="0d09ba9a-1d1c-457d-83e2-74ac448014d6"
+  <set name="Media Vme Pipe Gen9"
        chipset="CFLGT2"
        symbol_name="VMEPipe"
+       underscore_name="vme_pipe"
+       mdapi_supported_apis="MEDIA IO BB"
+       hw_config_guid="0d09ba9a-1d1c-457d-83e2-74ac448014d6"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VME Busy"
-             description="The percentage of time in which VME (IME or CRE) was actively processing data."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vme_busy"
              units="percent"
-             symbol_name="VMEBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Overview System Batch Tier2"
-             mdapi_group="VME Pipe"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VME Busy"
+             symbol_name="VMEBusy"
+             underscore_name="vme_busy"
+             description="The percentage of time in which VME (IME or CRE) was actively processing data."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="VME Pipe"
+             mdapi_usage_flags="Overview System Batch Tier2"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="e2f162ae-5732-4af0-8b11-69510f57094a"
        chipset="CFLGT2"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="e2f162ae-5732-4af0-8b11-69510f57094a"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
              description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
              description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen9.5"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="74fb4902-d3d3-4237-9e90-cbdc68d0a446"
        chipset="CFLGT2"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="74fb4902-d3d3-4237-9e90-cbdc68d0a446"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
-       underscore_name="pma__stall"
-       hw_config_guid="5ccbf9fb-6bf2-456b-a749-bdff7b1aff13"
        chipset="CFLGT2"
        symbol_name="PMA_Stall"
+       underscore_name="pma__stall"
+       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
+       hw_config_guid="5ccbf9fb-6bf2-456b-a749-bdff7b1aff13"
        >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
              description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GPU/Stencil Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index f8b8789..31a5661 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1553959550" merge_md5="">
   <set name="Render Metrics Basic Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="b316bcab-212f-4228-97de-af6b5a1a2ea1"
        chipset="CFLGT3"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="b316bcab-212f-4228-97de-af6b5a1a2ea1"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
-             data_type="uint64"
-             equation="B 4 READ B 5 READ UADD 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler1_bottleneck"
              units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
              description="The percentage of time in which Sampler 0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which Sampler 1 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="L3 Lookup Accesses w/o IC"
-             description="The total number of L3 cache lookup accesses w/o IC."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
              units="messages"
-             symbol_name="L3Lookups"
              semantic_type="event"
+             equation="B 4 READ B 5 READ UADD 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
+             description="The total number of L3 cache lookup accesses w/o IC."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Depth Throughput"
-             description="The total number of GPU memory bytes transferred between depth caches and GTI."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
+             description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
              description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
              mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Basic Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="607f9cc8-e026-4d5f-bfad-45c77eabc150"
        chipset="CFLGT3"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="607f9cc8-e026-4d5f-bfad-45c77eabc150"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="9875e050-b1bc-45e6-a6ab-665594601df9"
        chipset="CFLGT3"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="9875e050-b1bc-45e6-a6ab-665594601df9"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="VsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             high_watermark="15"
              data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
              units="percent"
-             symbol_name="SfBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
     <counter name="SO Bottleneck"
-             low_watermark="5"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
              description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Clipper Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
              data_type="float"
-             high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
     <counter name="Early Depth Bottleneck"
-             low_watermark="10"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
              description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="10"
              high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Reads Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="49c65f34-e625-4ca4-86b7-88693e624d4c"
        chipset="CFLGT3"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="49c65f34-e625-4ca4-86b7-88693e624d4c"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
-             data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
-             semantic_type="event"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
+             units="threads"
              semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
              units="messages"
-             symbol_name="GtiL3Bank1Reads"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
              description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
              units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
              units="messages"
-             symbol_name="GtiL3Bank2Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ"
              mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Writes Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="45c9e8ee-2998-4d83-88e8-9cb7e03287bf"
        chipset="CFLGT3"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="45c9e8ee-2998-4d83-88e8-9cb7e03287bf"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
-             data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiSoMemoryWrites"
-             description="The total number of GTI memory writes from Stream Output."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
+             description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Extended Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="bac415ce-d7a2-4f8d-9b16-834deba7330e"
        chipset="CFLGT3"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="bac415ce-d7a2-4f8d-9b16-834deba7330e"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
-             units="messages"
-             symbol_name="EuTypedAtomics0"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
              description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
              units="messages"
-             symbol_name="UntypedWrites0"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
              description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
              units="messages"
-             symbol_name="EuUntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
+             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
              description="The subslice 0 EU A64 Untyped Writes subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
              units="messages"
-             symbol_name="EuA64UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="B 7 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
-             data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
-             units="messages"
-             symbol_name="TypedReads0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedReads0"
-             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
-             data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
-             units="messages"
-             symbol_name="EuTypedReads0"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
              description="The ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="The ratio of EU typed read requests to L3 cache line reads."
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="a8cfca44-0e74-4338-9e57-3daad98957dd"
        chipset="CFLGT3"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="a8cfca44-0e74-4338-9e57-3daad98957dd"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu0Instruction"
-             semantic_type="duration"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
-             data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
-             units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
-             />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
-             data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
-             units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
-             />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
-             data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
-             units="messages"
-             symbol_name="SamplerAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="L3 Bank 01 Accesses"
-             description="The total number of accesses to L3 Bank 01."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to L3 Bank 01."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
              description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
              mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
-             units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="3d9acca5-8d39-4c34-89ee-f921848d8562"
        chipset="CFLGT3"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="3d9acca5-8d39-4c34-89ee-f921848d8562"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
              symbol_name="VsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss0)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="93e582ea-de69-46e3-81b5-73386164c047"
        chipset="CFLGT3"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="93e582ea-de69-46e3-81b5-73386164c047"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
-             units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="6e3680f3-8347-4e26-b930-3900d18d1322"
        chipset="CFLGT3"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="6e3680f3-8347-4e26-b930-3900d18d1322"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Stalled"
-             description="The percentage of time in which slice0 L3 bank2 is stalled"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_stalled"
              units="percent"
-             symbol_name="L30Bank2Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank2 Stalled"
+             symbol_name="L30Bank2Stalled"
+             underscore_name="l30_bank2_stalled"
+             description="The percentage of time in which slice0 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="1f513186-11d6-422d-a879-be86f2d08414"
        chipset="CFLGT3"
-       symbol_name="L3_3"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+       symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="1f513186-11d6-422d-a879-be86f2d08414"
+       >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 L3 Bank3 Stalled"
-             description="The percentage of time in which slice0 L3 bank3 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_stalled"
-             units="percent"
-             symbol_name="L30Bank3Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank3 Stalled"
+             symbol_name="L30Bank3Stalled"
+             underscore_name="l30_bank3_stalled"
+             description="The percentage of time in which slice0 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="d37d4266-b749-48b2-9652-c24b91784fc6"
        chipset="CFLGT3"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="d37d4266-b749-48b2-9652-c24b91784fc6"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
              units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set Sampler"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler"
-       hw_config_guid="5951c1d7-feef-4981-b0b5-4e4983023119"
        chipset="CFLGT3"
        symbol_name="Sampler"
+       underscore_name="sampler"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="5951c1d7-feef-4981-b0b5-4e4983023119"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
              units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
              units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
     <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
              description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
              description="The percentage of time in which slice0 subslice0 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
              units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
              description="The percentage of time in which slice0 subslice1 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
              units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="bf279ed6-b3dd-43f3-9810-cb55cb78fc62"
        chipset="CFLGT3"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="bf279ed6-b3dd-43f3-9810-cb55cb78fc62"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="a70c3fa2-e705-4f5a-8883-2ebd0feef1d2"
        chipset="CFLGT3"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="a70c3fa2-e705-4f5a-8883-2ebd0feef1d2"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
+             max_equation="100"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="e61ae394-9d9e-4204-a735-1dad7e44d953"
        chipset="CFLGT3"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="e61ae394-9d9e-4204-a735-1dad7e44d953"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active including Ext Math"
+             symbol_name="Fpu1ActiveAdjusted"
+             underscore_name="fpu1_active_adjusted"
              description="The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ  C 5 READ C 6 READ FADD C 7 READ FADD C 2 READ FADD C 3 READ FADD C 4 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active_adjusted"
              units="percent"
-             symbol_name="Fpu1ActiveAdjusted"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             equation="A 8 READ  C 5 READ C 6 READ FADD C 7 READ FADD C 2 READ FADD C 3 READ FADD C 4 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009888" value="0x02B14000" />
         <register type="NOA" address="0x00009888" value="0x02B20033" />
         <register type="NOA" address="0x00009888" value="0x00B20000" />
-        <register type="NOA" address="0x00009888" value="0x02B31000" />
-        <register type="NOA" address="0x00009888" value="0x00D08000" />
-        <register type="NOA" address="0x00009888" value="0x00D18000" />
-        <register type="NOA" address="0x00009888" value="0x00D21980" />
-        <register type="NOA" address="0x00009888" value="0x00D34000" />
-        <register type="NOA" address="0x00009888" value="0x1190FC00" />
-        <register type="NOA" address="0x00009888" value="0x37900000" />
-        <register type="NOA" address="0x00009888" value="0x51900000" />
-        <register type="NOA" address="0x00009888" value="0x41900C00" />
-        <register type="NOA" address="0x00009888" value="0x43900002" />
-        <register type="NOA" address="0x00009888" value="0x53900420" />
-        <register type="NOA" address="0x00009888" value="0x459000A1" />
-        <register type="NOA" address="0x00009888" value="0x33900000" />
-    </register_config>
-  </set>
-
-  <set name="Media Vme Pipe Gen9"
-       mdapi_supported_apis="MEDIA IO BB"
-       underscore_name="vme_pipe"
-       hw_config_guid="94272ad9-45ee-4e34-b7a7-51546cd6405c"
-       chipset="CFLGT3"
-       symbol_name="VMEPipe"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
+        <register type="NOA" address="0x00009888" value="0x02B31000" />
+        <register type="NOA" address="0x00009888" value="0x00D08000" />
+        <register type="NOA" address="0x00009888" value="0x00D18000" />
+        <register type="NOA" address="0x00009888" value="0x00D21980" />
+        <register type="NOA" address="0x00009888" value="0x00D34000" />
+        <register type="NOA" address="0x00009888" value="0x1190FC00" />
+        <register type="NOA" address="0x00009888" value="0x37900000" />
+        <register type="NOA" address="0x00009888" value="0x51900000" />
+        <register type="NOA" address="0x00009888" value="0x41900C00" />
+        <register type="NOA" address="0x00009888" value="0x43900002" />
+        <register type="NOA" address="0x00009888" value="0x53900420" />
+        <register type="NOA" address="0x00009888" value="0x459000A1" />
+        <register type="NOA" address="0x00009888" value="0x33900000" />
+    </register_config>
+  </set>
+
+  <set name="Media Vme Pipe Gen9"
+       chipset="CFLGT3"
+       symbol_name="VMEPipe"
+       underscore_name="vme_pipe"
+       mdapi_supported_apis="MEDIA IO BB"
+       hw_config_guid="94272ad9-45ee-4e34-b7a7-51546cd6405c"
+       >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VME Busy"
-             description="The percentage of time in which VME (IME or CRE) was actively processing data."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vme_busy"
              units="percent"
-             symbol_name="VMEBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Overview System Batch Tier2"
-             mdapi_group="VME Pipe"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VME Busy"
+             symbol_name="VMEBusy"
+             underscore_name="vme_busy"
+             description="The percentage of time in which VME (IME or CRE) was actively processing data."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="VME Pipe"
+             mdapi_usage_flags="Overview System Batch Tier2"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="22b7e0c2-cade-425f-b099-34479768c72a"
        chipset="CFLGT3"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="22b7e0c2-cade-425f-b099-34479768c72a"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Blitter Ring Busy"
-             description="The percentage of time when blitter command streamer was busy."
+    <counter name="Vdbox1 Ring Busy"
+             symbol_name="Vdbox1Busy"
+             underscore_name="vdbox1_busy"
+             description="The percentage of time when Vdbox1 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="AnyRingBusy"
-             description="The percentage of time when any command streamer was busy."
+    <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
+             description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vdbox1 Ring Busy"
-             description="The percentage of time when Vdbox1 command streamer was busy."
+    <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
+             description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox1_busy"
              units="percent"
-             symbol_name="Vdbox1Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="577e8e2c-3fa0-4875-8743-3538d585e3b0"
        chipset="CFLGT3"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="577e8e2c-3fa0-4875-8743-3538d585e3b0"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
-       underscore_name="pma__stall"
-       hw_config_guid="c11af8d1-858b-4f8b-98fb-8d683ba8bda0"
        chipset="CFLGT3"
        symbol_name="PMA_Stall"
+       underscore_name="pma__stall"
+       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
+       hw_config_guid="c11af8d1-858b-4f8b-98fb-8d683ba8bda0"
        >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
              description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GPU/Stencil Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index 85c50bb..a1c6c3d 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1508420698" merge_md5="">
   <set name="Render Metrics Basic Gen8LP"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="9d8a3af5-c02c-4a4a-b947-f1672469e0fb"
        chipset="CHV"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="9d8a3af5-c02c-4a4a-b947-f1672469e0fb"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="sampler1_bottleneck"
-             units="percent"
-             symbol_name="Sampler1Bottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Sampler 0 Busy"
-             description="The percentage of time in which Sampler 0 has been processing EU requests."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Busy"
-             description="The percentage of time in which Sampler 1 has been processing EU requests."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samplers Busy"
-             description="The percentage of time in which samplers have been processing EU requests."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ  B 1 READ UADD $GpuCoreClocks FDIV 2 FDIV 100 FMUL"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Depth Throughput"
-             description="The total number of GPU memory bytes transferred between depth caches and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
-             units="bytes"
-             symbol_name="GtiDepthThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Depth Cache"
+    <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
+             description="The percentage of time in which Sampler 0 has been processing EU requests."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
+             description="The percentage of time in which Sampler 1 has been processing EU requests."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
+             description="The percentage of time in which samplers have been processing EU requests."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ  B 1 READ UADD $GpuCoreClocks FDIV 2 FDIV 100 FMUL"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="Sampler 0 Bottleneck"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
+             description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
-             units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI RCC Throughput"
-             description="The total number of GPU memory bytes transferred between render color caches and GTI."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
-             units="bytes"
-             symbol_name="GtiRccThroughput"
-             semantic_type="throughput"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Color Cache"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
+             description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Draw"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
+             description="The total number of GPU memory bytes transferred between render color caches and GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Compute Metrics Basic Gen8LP"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="f522a89c-ecd1-4522-8331-3383c54af5f5"
        chipset="CHV"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="f522a89c-ecd1-4522-8331-3383c54af5f5"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 4 READ B 5 READ UADD 64 UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 2 READ B 3 READ UADD 64 UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL 2 UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 0 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Ring Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and Uncore ring."
+    <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 128 UMUL"
-             equation="C 2 READ 128 UMUL"
-             underscore_name="gti_ring_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="GtiRingThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="B 0 READ B 1 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
              description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 2 READ B 3 READ UADD 64 UMUL"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Typed Bytes Read"
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 0 READ B 1 READ UADD 64 UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ B 5 READ UADD 64 UMUL"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI Read-Only Stall"
-             description="The percentage of time in which GTI Read-Only port has been stalled."
-             data_type="float"
-             max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gti_ro_stall"
-             units="percent"
-             symbol_name="GtiRoStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI Read-Write Stall"
-             description="The percentage of time in which GTI Read-Write port has been stalled."
-             data_type="float"
-             max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gti_rw_stall"
-             units="percent"
-             symbol_name="GtiRwStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 0 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
              description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 1 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="C 1 READ 64 UMUL"
              mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Ring Throughput"
+             symbol_name="GtiRingThroughput"
+             underscore_name="gti_ring_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and Uncore ring."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 128 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 2 READ 128 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+    <counter name="GTI Read-Only Stall"
+             symbol_name="GtiRoStall"
+             underscore_name="gti_ro_stall"
+             description="The percentage of time in which GTI Read-Only port has been stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="GTI Read-Write Stall"
+             symbol_name="GtiRwStall"
+             underscore_name="gti_rw_stall"
+             description="The percentage of time in which GTI Read-Write port has been stalled."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
              units="percent"
-             symbol_name="EuSendActive"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="a9ccc03d-a943-4e6b-9cd6-13e063075927"
        chipset="CHV"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="a9ccc03d-a943-4e6b-9cd6-13e063075927"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="HiDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
-             units="percent"
-             symbol_name="SfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
              units="messages"
-             symbol_name="SamplerAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="HS Bottleneck"
-             low_watermark="3"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
              description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="3"
              high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
-    <counter name="SO Bottleneck"
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="SO Bottleneck"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
+             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Clipper Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
              data_type="float"
-             high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
     <counter name="Early Depth Bottleneck"
-             low_watermark="10"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
              description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="10"
              high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="2cf0c064-68df-4fac-9b3f-57f51ca8a069"
        chipset="CHV"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="2cf0c064-68df-4fac-9b3f-57f51ca8a069"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
              units="percent"
-             symbol_name="PolyDataReady"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 1 READ C 0 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader12_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader12AccessStalledOnL3"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HDC stalled by L3 (s0.ss0)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 5 READ B 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader10_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader10AccessStalledOnL3"
-             availability="$SubsliceMask 0x8 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 21 READ 4 UMUL"
              mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
-             data_type="float"
-             max_equation="100"
-             equation="B 7 READ B 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader11_access_stalled_on_l3"
-             units="percent"
-             symbol_name="NonSamplerShader11AccessStalledOnL3"
-             availability="$SubsliceMask 0x10 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss0)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <register_config type="NOA">
-        <register type="NOA" address="0x00009840" value="0x000000A0" />
-        <register type="NOA" address="0x00009888" value="0x105C0232" />
-        <register type="NOA" address="0x00009888" value="0x10580232" />
-        <register type="NOA" address="0x00009888" value="0x10380232" />
-        <register type="NOA" address="0x00009888" value="0x10DC0232" />
+    <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader12AccessStalledOnL3"
+             underscore_name="non_sampler_shader12_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ C 0 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader11AccessStalledOnL3"
+             underscore_name="non_sampler_shader11_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 7 READ B 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader10AccessStalledOnL3"
+             underscore_name="non_sampler_shader10_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ B 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <register_config type="NOA">
+        <register type="NOA" address="0x00009840" value="0x000000A0" />
+        <register type="NOA" address="0x00009888" value="0x105C0232" />
+        <register type="NOA" address="0x00009888" value="0x10580232" />
+        <register type="NOA" address="0x00009888" value="0x10380232" />
+        <register type="NOA" address="0x00009888" value="0x10DC0232" />
         <register type="NOA" address="0x00009888" value="0x10D80232" />
         <register type="NOA" address="0x00009888" value="0x10B80232" />
         <register type="NOA" address="0x00009888" value="0x118E4400" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="78a87ff9-543a-49ce-95ea-26d86071ea93"
        chipset="CHV"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="78a87ff9-543a-49ce-95ea-26d86071ea93"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank1 Stalled"
-             description="The percentage of time in which slice1 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank1_stalled"
-             units="percent"
-             symbol_name="L31Bank1Stalled"
-             availability="$SliceMask 0x2 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Slice1 L3 Bank0 Stalled"
-             description="The percentage of time in which slice1 L3 bank0 is stalled"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank0_stalled"
              units="percent"
-             symbol_name="L31Bank0Stalled"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Slice1 L3 Bank1 Active"
-             description="The percentage of time in which slice1 L3 bank1 is active"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank1_active"
              units="percent"
-             symbol_name="L31Bank1Active"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank0 Active"
-             description="The percentage of time in which slice1 L3 bank0 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank0_active"
-             units="percent"
-             symbol_name="L31Bank0Active"
-             availability="$SliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <register_config type="NOA">
-        <register type="NOA" address="0x00009840" value="0x000000A0" />
-        <register type="NOA" address="0x00009888" value="0x10BF03DA" />
-        <register type="NOA" address="0x00009888" value="0x14BF0001" />
-        <register type="NOA" address="0x00009888" value="0x12980340" />
-        <register type="NOA" address="0x00009888" value="0x12990340" />
-        <register type="NOA" address="0x00009888" value="0x0CBF1187" />
-        <register type="NOA" address="0x00009888" value="0x0EBF1205" />
-        <register type="NOA" address="0x00009888" value="0x00BF0500" />
-        <register type="NOA" address="0x00009888" value="0x02BF042B" />
-        <register type="NOA" address="0x00009888" value="0x04BF002C" />
-        <register type="NOA" address="0x00009888" value="0x0CDAC000" />
-        <register type="NOA" address="0x00009888" value="0x0EDAC000" />
-        <register type="NOA" address="0x00009888" value="0x00DA8000" />
-        <register type="NOA" address="0x00009888" value="0x02DAC000" />
-        <register type="NOA" address="0x00009888" value="0x04DA4000" />
-        <register type="NOA" address="0x00009888" value="0x04983400" />
-        <register type="NOA" address="0x00009888" value="0x10980000" />
-        <register type="NOA" address="0x00009888" value="0x06990034" />
-        <register type="NOA" address="0x00009888" value="0x10990000" />
-        <register type="NOA" address="0x00009888" value="0x0C9DC000" />
-        <register type="NOA" address="0x00009888" value="0x0E9DC000" />
-        <register type="NOA" address="0x00009888" value="0x009D8000" />
-        <register type="NOA" address="0x00009888" value="0x029DC000" />
-        <register type="NOA" address="0x00009888" value="0x049D4000" />
-        <register type="NOA" address="0x00009888" value="0x109F02A8" />
-        <register type="NOA" address="0x00009888" value="0x0C9FA000" />
-        <register type="NOA" address="0x00009888" value="0x0E9F00BA" />
-        <register type="NOA" address="0x00009888" value="0x0CB88000" />
-        <register type="NOA" address="0x00009888" value="0x0CB95000" />
-        <register type="NOA" address="0x00009888" value="0x0EB95000" />
-        <register type="NOA" address="0x00009888" value="0x00B94000" />
-        <register type="NOA" address="0x00009888" value="0x02B95000" />
-        <register type="NOA" address="0x00009888" value="0x04B91000" />
-        <register type="NOA" address="0x00009888" value="0x06B92000" />
-        <register type="NOA" address="0x00009888" value="0x0CBA4000" />
+    <counter name="Slice1 L3 Bank0 Stalled"
+             symbol_name="L31Bank0Stalled"
+             underscore_name="l31_bank0_stalled"
+             description="The percentage of time in which slice1 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank1 Stalled"
+             symbol_name="L31Bank1Stalled"
+             underscore_name="l31_bank1_stalled"
+             description="The percentage of time in which slice1 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank1 Active"
+             symbol_name="L31Bank1Active"
+             underscore_name="l31_bank1_active"
+             description="The percentage of time in which slice1 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank0 Active"
+             symbol_name="L31Bank0Active"
+             underscore_name="l31_bank0_active"
+             description="The percentage of time in which slice1 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <register_config type="NOA">
+        <register type="NOA" address="0x00009840" value="0x000000A0" />
+        <register type="NOA" address="0x00009888" value="0x10BF03DA" />
+        <register type="NOA" address="0x00009888" value="0x14BF0001" />
+        <register type="NOA" address="0x00009888" value="0x12980340" />
+        <register type="NOA" address="0x00009888" value="0x12990340" />
+        <register type="NOA" address="0x00009888" value="0x0CBF1187" />
+        <register type="NOA" address="0x00009888" value="0x0EBF1205" />
+        <register type="NOA" address="0x00009888" value="0x00BF0500" />
+        <register type="NOA" address="0x00009888" value="0x02BF042B" />
+        <register type="NOA" address="0x00009888" value="0x04BF002C" />
+        <register type="NOA" address="0x00009888" value="0x0CDAC000" />
+        <register type="NOA" address="0x00009888" value="0x0EDAC000" />
+        <register type="NOA" address="0x00009888" value="0x00DA8000" />
+        <register type="NOA" address="0x00009888" value="0x02DAC000" />
+        <register type="NOA" address="0x00009888" value="0x04DA4000" />
+        <register type="NOA" address="0x00009888" value="0x04983400" />
+        <register type="NOA" address="0x00009888" value="0x10980000" />
+        <register type="NOA" address="0x00009888" value="0x06990034" />
+        <register type="NOA" address="0x00009888" value="0x10990000" />
+        <register type="NOA" address="0x00009888" value="0x0C9DC000" />
+        <register type="NOA" address="0x00009888" value="0x0E9DC000" />
+        <register type="NOA" address="0x00009888" value="0x009D8000" />
+        <register type="NOA" address="0x00009888" value="0x029DC000" />
+        <register type="NOA" address="0x00009888" value="0x049D4000" />
+        <register type="NOA" address="0x00009888" value="0x109F02A8" />
+        <register type="NOA" address="0x00009888" value="0x0C9FA000" />
+        <register type="NOA" address="0x00009888" value="0x0E9F00BA" />
+        <register type="NOA" address="0x00009888" value="0x0CB88000" />
+        <register type="NOA" address="0x00009888" value="0x0CB95000" />
+        <register type="NOA" address="0x00009888" value="0x0EB95000" />
+        <register type="NOA" address="0x00009888" value="0x00B94000" />
+        <register type="NOA" address="0x00009888" value="0x02B95000" />
+        <register type="NOA" address="0x00009888" value="0x04B91000" />
+        <register type="NOA" address="0x00009888" value="0x06B92000" />
+        <register type="NOA" address="0x00009888" value="0x0CBA4000" />
         <register type="NOA" address="0x00009888" value="0x0F88000F" />
         <register type="NOA" address="0x00009888" value="0x03888000" />
         <register type="NOA" address="0x00009888" value="0x05888000" />
   </set>
 
   <set name="Metric set L3_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="9f2cece5-7bfe-4320-ad66-8c7cc526bec5"
        chipset="CHV"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="9f2cece5-7bfe-4320-ad66-8c7cc526bec5"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 6 READ"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
              units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
     <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
              description="The total number of pixels dropped on early depth test."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <register_config type="NOA">
-        <register type="NOA" address="0x00009840" value="0x000000A0" />
-        <register type="NOA" address="0x00009888" value="0x103F03DA" />
-        <register type="NOA" address="0x00009888" value="0x143F0001" />
-        <register type="NOA" address="0x00009888" value="0x12180340" />
-        <register type="NOA" address="0x00009888" value="0x12190340" />
-        <register type="NOA" address="0x00009888" value="0x0C3F1187" />
-        <register type="NOA" address="0x00009888" value="0x0E3F1205" />
-        <register type="NOA" address="0x00009888" value="0x003F0500" />
-        <register type="NOA" address="0x00009888" value="0x023F042B" />
-        <register type="NOA" address="0x00009888" value="0x043F002C" />
-        <register type="NOA" address="0x00009888" value="0x0C5AC000" />
-        <register type="NOA" address="0x00009888" value="0x0E5AC000" />
-        <register type="NOA" address="0x00009888" value="0x005A8000" />
-        <register type="NOA" address="0x00009888" value="0x025AC000" />
-        <register type="NOA" address="0x00009888" value="0x045A4000" />
-        <register type="NOA" address="0x00009888" value="0x04183400" />
-        <register type="NOA" address="0x00009888" value="0x10180000" />
-        <register type="NOA" address="0x00009888" value="0x06190034" />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <register_config type="NOA">
+        <register type="NOA" address="0x00009840" value="0x000000A0" />
+        <register type="NOA" address="0x00009888" value="0x103F03DA" />
+        <register type="NOA" address="0x00009888" value="0x143F0001" />
+        <register type="NOA" address="0x00009888" value="0x12180340" />
+        <register type="NOA" address="0x00009888" value="0x12190340" />
+        <register type="NOA" address="0x00009888" value="0x0C3F1187" />
+        <register type="NOA" address="0x00009888" value="0x0E3F1205" />
+        <register type="NOA" address="0x00009888" value="0x003F0500" />
+        <register type="NOA" address="0x00009888" value="0x023F042B" />
+        <register type="NOA" address="0x00009888" value="0x043F002C" />
+        <register type="NOA" address="0x00009888" value="0x0C5AC000" />
+        <register type="NOA" address="0x00009888" value="0x0E5AC000" />
+        <register type="NOA" address="0x00009888" value="0x005A8000" />
+        <register type="NOA" address="0x00009888" value="0x025AC000" />
+        <register type="NOA" address="0x00009888" value="0x045A4000" />
+        <register type="NOA" address="0x00009888" value="0x04183400" />
+        <register type="NOA" address="0x00009888" value="0x10180000" />
+        <register type="NOA" address="0x00009888" value="0x06190034" />
         <register type="NOA" address="0x00009888" value="0x10190000" />
         <register type="NOA" address="0x00009888" value="0x0C1DC000" />
         <register type="NOA" address="0x00009888" value="0x0E1DC000" />
   </set>
 
   <set name="Metric set L3_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="d890ef38-d309-47e4-b8b5-aa779bb19ab0"
        chipset="CHV"
        symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="d890ef38-d309-47e4-b8b5-aa779bb19ab0"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 L3 Bank3 Stalled"
-             description="The percentage of time in which slice0 L3 bank3 is stalled"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_stalled"
              units="percent"
-             symbol_name="L30Bank3Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank3 Active"
-             description="The percentage of time in which slice1 L3 bank3 is active"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank3_active"
              units="percent"
-             symbol_name="L31Bank3Active"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank3 Stalled"
-             description="The percentage of time in which slice1 L3 bank3 is stalled"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank3_stalled"
              units="percent"
-             symbol_name="L31Bank3Stalled"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <register_config type="NOA">
-        <register type="NOA" address="0x00009840" value="0x000000A0" />
-        <register type="NOA" address="0x00009888" value="0x121B0340" />
-        <register type="NOA" address="0x00009888" value="0x103F0274" />
-        <register type="NOA" address="0x00009888" value="0x123F0000" />
-        <register type="NOA" address="0x00009888" value="0x129B0340" />
-        <register type="NOA" address="0x00009888" value="0x10BF0274" />
-        <register type="NOA" address="0x00009888" value="0x12BF0000" />
-        <register type="NOA" address="0x00009888" value="0x041B3400" />
-        <register type="NOA" address="0x00009888" value="0x101B0000" />
-        <register type="NOA" address="0x00009888" value="0x045C8000" />
-        <register type="NOA" address="0x00009888" value="0x0A3D4000" />
-        <register type="NOA" address="0x00009888" value="0x003F0080" />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Slice0 L3 Bank3 Stalled"
+             symbol_name="L30Bank3Stalled"
+             underscore_name="l30_bank3_stalled"
+             description="The percentage of time in which slice0 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank3 Stalled"
+             symbol_name="L31Bank3Stalled"
+             underscore_name="l31_bank3_stalled"
+             description="The percentage of time in which slice1 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank3 Active"
+             symbol_name="L31Bank3Active"
+             underscore_name="l31_bank3_active"
+             description="The percentage of time in which slice1 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <register_config type="NOA">
+        <register type="NOA" address="0x00009840" value="0x000000A0" />
+        <register type="NOA" address="0x00009888" value="0x121B0340" />
+        <register type="NOA" address="0x00009888" value="0x103F0274" />
+        <register type="NOA" address="0x00009888" value="0x123F0000" />
+        <register type="NOA" address="0x00009888" value="0x129B0340" />
+        <register type="NOA" address="0x00009888" value="0x10BF0274" />
+        <register type="NOA" address="0x00009888" value="0x12BF0000" />
+        <register type="NOA" address="0x00009888" value="0x041B3400" />
+        <register type="NOA" address="0x00009888" value="0x101B0000" />
+        <register type="NOA" address="0x00009888" value="0x045C8000" />
+        <register type="NOA" address="0x00009888" value="0x0A3D4000" />
+        <register type="NOA" address="0x00009888" value="0x003F0080" />
         <register type="NOA" address="0x00009888" value="0x023F0793" />
         <register type="NOA" address="0x00009888" value="0x043F0014" />
         <register type="NOA" address="0x00009888" value="0x04588000" />
   </set>
 
   <set name="Metric set L3_4"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_4"
-       hw_config_guid="5fdff4a6-9dc8-45e1-bfda-ef54869fbdd4"
        chipset="CHV"
        symbol_name="L3_4"
+       underscore_name="l3_4"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="5fdff4a6-9dc8-45e1-bfda-ef54869fbdd4"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 6 READ"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Stalled"
-             description="The percentage of time in which slice0 L3 bank2 is stalled"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_stalled"
              units="percent"
-             symbol_name="L30Bank2Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 L3 Bank2 Active"
-             description="The percentage of time in which slice1 L3 bank2 is active"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank2_active"
              units="percent"
-             symbol_name="L31Bank2Active"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Slice1 L3 Bank2 Stalled"
-             description="The percentage of time in which slice1 L3 bank2 is stalled"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l31_bank2_stalled"
              units="percent"
-             symbol_name="L31Bank2Stalled"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <register_config type="NOA">
-        <register type="NOA" address="0x00009840" value="0x000000A0" />
-        <register type="NOA" address="0x00009888" value="0x121A0340" />
-        <register type="NOA" address="0x00009888" value="0x103F0017" />
-        <register type="NOA" address="0x00009888" value="0x123F0020" />
-        <register type="NOA" address="0x00009888" value="0x129A0340" />
-        <register type="NOA" address="0x00009888" value="0x10BF0017" />
-        <register type="NOA" address="0x00009888" value="0x12BF0020" />
-        <register type="NOA" address="0x00009888" value="0x041A3400" />
-        <register type="NOA" address="0x00009888" value="0x101A0000" />
-        <register type="NOA" address="0x00009888" value="0x043B8000" />
-        <register type="NOA" address="0x00009888" value="0x0A3E0010" />
-        <register type="NOA" address="0x00009888" value="0x003F0200" />
-        <register type="NOA" address="0x00009888" value="0x023F0113" />
-        <register type="NOA" address="0x00009888" value="0x043F0014" />
-        <register type="NOA" address="0x00009888" value="0x02592000" />
+    <counter name="Slice0 L3 Bank2 Stalled"
+             symbol_name="L30Bank2Stalled"
+             underscore_name="l30_bank2_stalled"
+             description="The percentage of time in which slice0 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank2 Stalled"
+             symbol_name="L31Bank2Stalled"
+             underscore_name="l31_bank2_stalled"
+             description="The percentage of time in which slice1 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 L3 Bank2 Active"
+             symbol_name="L31Bank2Active"
+             underscore_name="l31_bank2_active"
+             description="The percentage of time in which slice1 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <register_config type="NOA">
+        <register type="NOA" address="0x00009840" value="0x000000A0" />
+        <register type="NOA" address="0x00009888" value="0x121A0340" />
+        <register type="NOA" address="0x00009888" value="0x103F0017" />
+        <register type="NOA" address="0x00009888" value="0x123F0020" />
+        <register type="NOA" address="0x00009888" value="0x129A0340" />
+        <register type="NOA" address="0x00009888" value="0x10BF0017" />
+        <register type="NOA" address="0x00009888" value="0x12BF0020" />
+        <register type="NOA" address="0x00009888" value="0x041A3400" />
+        <register type="NOA" address="0x00009888" value="0x101A0000" />
+        <register type="NOA" address="0x00009888" value="0x043B8000" />
+        <register type="NOA" address="0x00009888" value="0x0A3E0010" />
+        <register type="NOA" address="0x00009888" value="0x003F0200" />
+        <register type="NOA" address="0x00009888" value="0x023F0113" />
+        <register type="NOA" address="0x00009888" value="0x043F0014" />
+        <register type="NOA" address="0x00009888" value="0x02592000" />
         <register type="NOA" address="0x00009888" value="0x005A8000" />
         <register type="NOA" address="0x00009888" value="0x025AC000" />
         <register type="NOA" address="0x00009888" value="0x045A4000" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="2c0e45e1-7e2c-4a14-ae00-0b7ec868b8aa"
        chipset="CHV"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="2c0e45e1-7e2c-4a14-ae00-0b7ec868b8aa"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 Rasterizer Input Available"
-             description="The percentage of time in which slice1 rasterizer input is available"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer1_input_available"
              units="percent"
-             symbol_name="Rasterizer1InputAvailable"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
-             units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied))"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 Pixel Values Ready"
-             description="The percentage of time in which slice1 pixel values are ready"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values1_ready"
              units="percent"
-             symbol_name="PixelValues1Ready"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 PS Output Available"
-             description="The percentage of time in which slice1 PS output is available"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output1_available"
              units="percent"
-             symbol_name="PSOutput1Available"
-             availability="$SliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice1 Rasterizer Output Ready"
-             description="The percentage of time in which slice1 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer1_output_ready"
-             units="percent"
-             symbol_name="Rasterizer1OutputReady"
-             availability="$SliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice1 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data1_ready"
-             units="percent"
-             symbol_name="PixelData1Ready"
-             availability="$SliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData1Ready"
+             underscore_name="pixel_data1_ready"
+             description="The percentage of time in which slice1 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied))"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 Rasterizer Input Available"
+             symbol_name="Rasterizer1InputAvailable"
+             underscore_name="rasterizer1_input_available"
+             description="The percentage of time in which slice1 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 Rasterizer Output Ready"
+             symbol_name="Rasterizer1OutputReady"
+             underscore_name="rasterizer1_output_ready"
+             description="The percentage of time in which slice1 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+    <counter name="Slice1 Pixel Values Ready"
+             symbol_name="PixelValues1Ready"
+             underscore_name="pixel_values1_ready"
+             description="The percentage of time in which slice1 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice1 PS Output Available"
+             symbol_name="PSOutput1Available"
+             underscore_name="ps_output1_available"
+             description="The percentage of time in which slice1 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x2 AND"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set Sampler_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler_1"
-       hw_config_guid="71148d78-baf5-474f-878a-e23158d0265d"
        chipset="CHV"
        symbol_name="Sampler_1"
+       underscore_name="sampler_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="71148d78-baf5-474f-878a-e23158d0265d"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Slice1 Subslice0 Input Available"
-             description="The percentage of time in which slice1 subslice0 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler10_input_available"
-             units="percent"
-             symbol_name="Sampler10InputAvailable"
-             availability="$SubsliceMask 0x8 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Slice1 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice1 subslice2 sampler output is ready"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler12_output_ready"
              units="percent"
-             symbol_name="Sampler12OutputReady"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 Subslice1 Input Available"
-             description="The percentage of time in which slice1 subslice1 sampler input is available"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler11_input_available"
              units="percent"
-             symbol_name="Sampler11InputAvailable"
-             availability="$SubsliceMask 0x10 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 21 READ 4 UMUL"
              mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice1 Subslice2 Input Available"
-             description="The percentage of time in which slice1 subslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler12_input_available"
-             units="percent"
-             symbol_name="Sampler12InputAvailable"
-             availability="$SubsliceMask 0x20 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice1 Subslice0 Sampler Output Ready"
-             description="The percentage of time in which slice1 subslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler10_output_ready"
-             units="percent"
-             symbol_name="Sampler10OutputReady"
-             availability="$SubsliceMask 0x8 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Slice1 Subslice1 Sampler Output Ready"
-             description="The percentage of time in which slice1 subslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler11_output_ready"
-             units="percent"
-             symbol_name="Sampler11OutputReady"
-             availability="$SubsliceMask 0x10 AND"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice1 Subslice1 Input Available"
+             symbol_name="Sampler11InputAvailable"
+             underscore_name="sampler11_input_available"
+             description="The percentage of time in which slice1 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice2 Input Available"
+             symbol_name="Sampler12InputAvailable"
+             underscore_name="sampler12_input_available"
+             description="The percentage of time in which slice1 subslice2 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice0 Input Available"
+             symbol_name="Sampler10InputAvailable"
+             underscore_name="sampler10_input_available"
+             description="The percentage of time in which slice1 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler12OutputReady"
+             underscore_name="sampler12_output_ready"
+             description="The percentage of time in which slice1 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler10OutputReady"
+             underscore_name="sampler10_output_ready"
+             description="The percentage of time in which slice1 subslice0 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice1 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler11OutputReady"
+             underscore_name="sampler11_output_ready"
+             description="The percentage of time in which slice1 subslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set Sampler_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler_2"
-       hw_config_guid="b996a2b7-c59c-492d-877a-8cd54fd6df84"
        chipset="CHV"
        symbol_name="Sampler_2"
+       underscore_name="sampler_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="b996a2b7-c59c-492d-877a-8cd54fd6df84"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
              units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
              units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
     <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
              description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
              description="The percentage of time in which slice0 subslice0 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
              units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
              description="The percentage of time in which slice0 subslice1 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
              units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="eb2fecba-b431-42e7-8261-fe9429a6e67a"
        chipset="CHV"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="eb2fecba-b431-42e7-8261-fe9429a6e67a"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread11_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread11ReadyForDispatch"
-             availability="$SubsliceMask 0x10 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread12_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread12ReadyForDispatch"
-             availability="$SubsliceMask 0x20 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice0 thread dispatcher"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread10_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread10ReadyForDispatch"
-             availability="$SubsliceMask 0x8 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice1"
+             symbol_name="NonPSThread11ReadyForDispatch"
+             underscore_name="non_ps_thread11_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice0"
+             symbol_name="PSThread10ReadyForDispatch"
+             underscore_name="ps_thread10_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice0"
+             symbol_name="NonPSThread10ReadyForDispatch"
+             underscore_name="non_ps_thread10_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="PS Thread Ready For Dispatch on Slice1 Subslice2"
+             symbol_name="PSThread12ReadyForDispatch"
+             underscore_name="ps_thread12_ready_for_dispatch"
              description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread12_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread12ReadyForDispatch"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice2"
+             symbol_name="NonPSThread12ReadyForDispatch"
+             underscore_name="non_ps_thread12_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice0 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice1 Subslice1"
+             symbol_name="PSThread11ReadyForDispatch"
+             underscore_name="ps_thread11_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice1 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread10_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread10ReadyForDispatch"
-             availability="$SubsliceMask 0x8 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice1 Subslice1"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice1 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread11_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread11ReadyForDispatch"
-             availability="$SubsliceMask 0x10 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="60749470-a648-4a4b-9f10-dbfe1e36e44d"
        chipset="CHV"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="60749470-a648-4a4b-9f10-dbfe1e36e44d"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice1 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 1"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header11_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader11ReadyPort1"
-             availability="$SubsliceMask 0x10 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
     <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
              description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice1 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 0"
-             data_type="float"
-             max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header11_ready_port0"
-             units="percent"
-             symbol_name="ThreadHeader11ReadyPort0"
-             availability="$SubsliceMask 0x10 AND"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice1 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 0"
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header12_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader12ReadyPort0"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice1 Subslice0 Port 1"
-             description="The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 1"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header10_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader10ReadyPort1"
-             availability="$SubsliceMask 0x8 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice1 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 1"
+    <counter name="Thread Header Ready on Slice1 Subslice2 Port 0"
+             symbol_name="ThreadHeader12ReadyPort0"
+             underscore_name="thread_header12_ready_port0"
+             description="The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header12_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader12ReadyPort1"
-             availability="$SubsliceMask 0x20 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+    <counter name="Thread Header Ready on Slice1 Subslice2 Port 1"
+             symbol_name="ThreadHeader12ReadyPort1"
+             underscore_name="thread_header12_ready_port1"
+             description="The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL 2 UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x20 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+    <counter name="Thread Header Ready on Slice1 Subslice1 Port 1"
+             symbol_name="ThreadHeader11ReadyPort1"
+             underscore_name="thread_header11_ready_port1"
+             description="The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Thread Header Ready on Slice1 Subslice0 Port 0"
+             symbol_name="ThreadHeader10ReadyPort0"
+             underscore_name="thread_header10_ready_port0"
              description="The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header10_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader10ReadyPort0"
-             availability="$SubsliceMask 0x8 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+    <counter name="Thread Header Ready on Slice1 Subslice0 Port 1"
+             symbol_name="ThreadHeader10ReadyPort1"
+             underscore_name="thread_header10_ready_port1"
+             description="The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice1 Subslice1 Port 0"
+             symbol_name="ThreadHeader11ReadyPort0"
+             underscore_name="thread_header11_ready_port0"
+             description="The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x10 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen8LP"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="4a534b07-cba3-414d-8d60-874830e883aa"
        chipset="CHV"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="4a534b07-cba3-414d-8d60-874830e883aa"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
index 20fcf71..ff06a85 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1572425295" merge_md5="">
   <set name="Render Metrics Basic Gen11"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="807f52f4-e457-4da4-b2b6-9a7b6dc2b1ed"
        chipset="EHL"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="807f52f4-e457-4da4-b2b6-9a7b6dc2b1ed"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Sampler00 Busy"
-             description="The percentage of time in which Slice0 Sampler0 has been processing EU requests."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_busy"
              units="percent"
-             symbol_name="Sampler00Busy"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samplers Busy"
-             description="The percentage of time in which samplers have been processing EU requests."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="$Sampler00Busy"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
-             availability="$SubsliceMask 9 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Sampler00 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Slice0 Sampler0 has been slowing down the pipe when processing EU requests."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler00_bottleneck"
              units="percent"
-             symbol_name="Sampler00Bottleneck"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  B 5 READ B 4 READ UADD UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Sampler00 Busy"
+             symbol_name="Sampler00Busy"
+             underscore_name="sampler00_busy"
+             description="The percentage of time in which Slice0 Sampler0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
+             description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="$Sampler00Busy"
+             availability="$SubsliceMask 9 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="Sampler00 Bottleneck"
+             symbol_name="Sampler00Bottleneck"
+             underscore_name="sampler00_bottleneck"
+             description="The percentage of time in which Slice0 Sampler0 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler00Bottleneck"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             availability="$SubsliceMask 9 AND"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  B 0 READ B 2 READ UADD UMUL"
-             underscore_name="gti_write_throughput"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ C 5 READ UADD 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  B 5 READ B 4 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  B 0 READ B 2 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler00Bottleneck"
+             availability="$SubsliceMask 9 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Compute Metrics Basic Gen11"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="5706829a-b9f3-4d0f-aa88-420f452540bf"
        chipset="EHL"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="5706829a-b9f3-4d0f-aa88-420f452540bf"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 3 READ C 2 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ C 4 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU AVG IPC Rate"
-             description="The average rate of IPC calculated for 2 FPU pipelines."
-             data_type="float"
-             max_equation="2"
-             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
-             units="number"
-             symbol_name="EuAvgIpcRate"
-             semantic_type="ratio"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+    <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
+             description="The average rate of IPC calculated for 2 FPU pipelines."
+             data_type="float"
+             max_equation="2"
+             units="number"
+             semantic_type="ratio"
+             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
+    <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Typed Atomics Accesses"
-             description="The total number of typed atomic accesses via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 4 READ B 5 READ UADD 2 UDIV $EuSubslicesTotalCount UMUL"
-             underscore_name="typed_atomics"
-             units="events"
-             symbol_name="TypedAtomics"
-             semantic_type="throughput"
+    <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="8 A 13 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  B 7 READ B 6 READ UADD UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 0 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
-             underscore_name="untyped_bytes_written"
-             units="bytes"
-             symbol_name="UntypedBytesWritten"
-             semantic_type="throughput"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Typed Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 7 READ C 6 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
-             underscore_name="typed_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="8 A 13 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  B 0 READ B 2 READ UADD UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 7 READ C 6 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 5 READ C 4 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 3 READ C 2 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 1 READ C 0 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Typed Atomics Accesses"
+             symbol_name="TypedAtomics"
+             underscore_name="typed_atomics"
+             description="The total number of typed atomic accesses via Data Port."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="events"
+             semantic_type="throughput"
+             equation="B 4 READ B 5 READ UADD 2 UDIV $EuSubslicesTotalCount UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  B 7 READ B 6 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  B 0 READ B 2 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="ComputeExtended Gen11"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="2ab52087-3700-4db4-8dda-f73b77edfd93"
        chipset="EHL"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="2ab52087-3700-4db4-8dda-f73b77edfd93"
        >
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="B 7 READ B 1 READ FADD C 2 READ FADD C 3 READ FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="ratio"
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Eu Typed Atomics 00"
-             description="Slice0 Dualsubslice 0 Eu Typed Atomics"
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_atomics00"
-             units="messages"
-             symbol_name="EuTypedAtomics00"
-             availability="$SubsliceMask 1 AND"
+             units="cycles"
              semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 00"
-             description="Slice 0 Dualsubslice 0 typed atomics."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="typed_atomics00"
-             units="messages"
-             symbol_name="TypedAtomics00"
-             availability="$SubsliceMask 1 AND"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             equation="$EuTypedAtomics00 $TypedAtomics00 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="ratio"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Eu 64 Untyped Reads 00"
-             description="Slice0 Dualsubslice 0 Eu 64 Untyped Reads"
-             data_type="float"
-             equation="B 5 READ C 1 READ FADD B 6 READ FADD"
-             underscore_name="eu_a64_untyped_reads00"
-             units="messages"
-             symbol_name="EuA64UntypedReads00"
-             availability="$SubsliceMask 1 AND"
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Eu A32 Untyped Writes 00"
-             description="Slice0 Dualsubslice 0 Eu A32 Untyped Writes"
+    <counter name="Typed Atomics 00"
+             symbol_name="TypedAtomics00"
+             underscore_name="typed_atomics00"
+             description="Slice 0 Dualsubslice 0 typed atomics."
              data_type="uint64"
-             equation="B 7 READ B 1 READ UADD"
-             underscore_name="eu_a32_untyped_writes00"
              units="messages"
-             symbol_name="EuA32UntypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 7 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="Typed Reads 00"
+             symbol_name="TypedReads00"
+             underscore_name="typed_reads00"
+             description="Slice 0 Dualsubslice 0 typed reads."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="messages"
              semantic_type="event"
+             equation="C 6 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Typed Writes 00"
+             symbol_name="TypedWrites00"
+             underscore_name="typed_writes00"
+             description="Slice 0 Dualsubslice 0 typed writes."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Untyped Reads 00"
+             symbol_name="UntypedReads00"
+             underscore_name="untyped_reads00"
+             description="Slice 0 Dualsubslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="C 4 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Untyped Writes 00"
+             symbol_name="UntypedWrites00"
+             underscore_name="untyped_writes00"
+             description="Slice 0 Dualsubslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 3 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Eu Typed Reads 00"
+             symbol_name="EuTypedReads00"
+             underscore_name="eu_typed_reads00"
              description="Slice0 Dualsubslice 0 Eu Typed Reads"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_reads00"
              units="messages"
-             symbol_name="EuTypedReads00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="Eu Typed Writes 00"
+             symbol_name="EuTypedWrites00"
+             underscore_name="eu_typed_writes00"
              description="Slice0 Dualsubslice 0 Eu Typed Writes"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_typed_writes00"
              units="messages"
-             symbol_name="EuTypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Typed Writes 00"
-             description="Slice 0 Dualsubslice 0 typed writes."
+    <counter name="Eu Typed Atomics 00"
+             symbol_name="EuTypedAtomics00"
+             underscore_name="eu_typed_atomics00"
+             description="Slice0 Dualsubslice 0 Eu Typed Atomics"
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="typed_writes00"
              units="messages"
-             symbol_name="TypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites00 $TypedWrites00 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
+             equation="B 2 READ"
              availability="$SubsliceMask 1 AND"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Untyped Writes 00"
-             description="Slice 0 Dualsubslice 0 untyped writes (including SLM writes)."
+    <counter name="Eu A32 Untyped Reads 00"
+             symbol_name="EuA32UntypedReads00"
+             underscore_name="eu_a32_untyped_reads00"
+             description="Slice0 Dualsubslice 0 Eu A32 Untyped Reads"
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_writes00"
              units="messages"
-             symbol_name="UntypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 00"
-             description="Slice 0 Dualsubslice 0 untyped reads (including SLM reads)."
+    <counter name="Eu A32 Untyped Writes 00"
+             symbol_name="EuA32UntypedWrites00"
+             underscore_name="eu_a32_untyped_writes00"
+             description="Slice0 Dualsubslice 0 Eu A32 Untyped Writes"
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="untyped_reads00"
              units="messages"
-             symbol_name="UntypedReads00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ B 1 READ UADD"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Eu 64 Untyped Reads 00"
+             symbol_name="EuA64UntypedReads00"
+             underscore_name="eu_a64_untyped_reads00"
+             description="Slice0 Dualsubslice 0 Eu 64 Untyped Reads"
+             data_type="float"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ C 1 READ FADD B 6 READ FADD"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="Eu A64 Untyped Writes 00"
+             symbol_name="EuA64UntypedWrites00"
+             underscore_name="eu_a64_untyped_writes00"
              description="Slice0 Dualsubslice 0 Eu A64 Untyped Writes"
              data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="eu_a64_untyped_writes00"
              units="messages"
-             symbol_name="EuA64UntypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 2 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="UntypedReadsPerCacheLine"
-             description="The ratio of EU untyped read requests to L3 cache line reads."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             equation="B 0 READ B 5 READ C 1 READ FADD B 6 READ FADD FADD C 4 READ FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 00"
-             description="Slice 0 Dualsubslice 0 typed reads."
-             data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="typed_reads00"
-             units="messages"
-             symbol_name="TypedReads00"
+             equation="$EuTypedAtomics00 $TypedAtomics00 FDIV"
              availability="$SubsliceMask 1 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
              description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuTypedReads00 $TypedReads00 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
-             availability="$SubsliceMask 1 AND"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="$EuTypedReads00 $TypedReads00 FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Eu A32 Untyped Reads 00"
-             description="Slice0 Dualsubslice 0 Eu A32 Untyped Reads"
-             data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_a32_untyped_reads00"
-             units="messages"
-             symbol_name="EuA32UntypedReads00"
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites00 $TypedWrites00 FDIV"
              availability="$SubsliceMask 1 AND"
-             semantic_type="event"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
+             description="The ratio of EU untyped read requests to L3 cache line reads."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="B 0 READ B 5 READ C 1 READ FADD B 6 READ FADD FADD C 4 READ FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="B 7 READ B 1 READ FADD C 2 READ FADD C 3 READ FDIV"
+             availability="$SubsliceMask 1 AND"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen11"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="498aa71d-ae5c-4523-a246-33fb2769a386"
        chipset="EHL"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="498aa71d-ae5c-4523-a246-33fb2769a386"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank3."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank0."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 7 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank1 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank1."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 6 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
-             units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank2."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 5 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
-             units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank4 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank4."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="l3_bank04_accesses"
-             units="messages"
-             symbol_name="L3Bank04Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="Slice0 L3 Bank5 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank5."
-             data_type="uint64"
-             equation="C 2 READ 2 UMUL"
-             underscore_name="l3_bank05_accesses"
-             units="messages"
-             symbol_name="L3Bank05Accesses"
-             availability="$SliceMask 1 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="Slice0 L3 Bank6 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank6."
-             data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank06_accesses"
-             units="messages"
-             symbol_name="L3Bank06Accesses"
-             availability="$SliceMask 1 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="Slice0 L3 Bank7 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank7."
-             data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank07_accesses"
-             units="messages"
-             symbol_name="L3Bank07Accesses"
-             availability="$SliceMask 1 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="$L3Bank00Accesses $L3Bank01Accesses UADD $L3Bank02Accesses UADD $L3Bank03Accesses UADD $L3Bank04Accesses UADD $L3Bank05Accesses UADD $L3Bank06Accesses UADD $L3Bank07Accesses UADD"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="64  B 7 READ B 6 READ UADD 8 UMUL UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
              description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
-             data_type="uint64"
-             equation="2 B 5 READ B 4 READ UADD UMUL"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
-             semantic_type="event"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
     <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
              description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
              units="percent"
-             symbol_name="EuMoveFpu0Instruction"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
-             units="messages"
-             symbol_name="SamplerAccesses"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="$SamplerAccesses 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
+             equation="$SamplerAccesses 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="L3 Lookup Accesses w/o IC"
-             description="The total number of L3 cache lookup accesses w/o IC."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="B 7 READ B 6 READ UADD 8 UMUL A 32 READ UADD"
-             underscore_name="l3_lookups"
-             units="messages"
-             symbol_name="L3Lookups"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 2 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="Slice0 L3 Bank0 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to Slice0 L3 Bank0."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="C 7 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Slice0 L3 Bank1 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to Slice0 L3 Bank1."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
+             equation="C 6 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Slice0 L3 Bank2 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
+             description="The total number of accesses to Slice0 L3 Bank2."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="C 5 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Slice0 L3 Bank3 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to Slice0 L3 Bank3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Slice0 L3 Bank4 Accesses"
+             symbol_name="L3Bank04Accesses"
+             underscore_name="l3_bank04_accesses"
+             description="The total number of accesses to Slice0 L3 Bank4."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 3 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Slice0 L3 Bank5 Accesses"
+             symbol_name="L3Bank05Accesses"
+             underscore_name="l3_bank05_accesses"
+             description="The total number of accesses to Slice0 L3 Bank5."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="C 2 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Slice0 L3 Bank6 Accesses"
+             symbol_name="L3Bank06Accesses"
+             underscore_name="l3_bank06_accesses"
+             description="The total number of accesses to Slice0 L3 Bank6."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Slice0 L3 Bank7 Accesses"
+             symbol_name="L3Bank07Accesses"
+             underscore_name="l3_bank07_accesses"
+             description="The total number of accesses to Slice0 L3 Bank7."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="$L3Bank00Accesses $L3Bank01Accesses UADD $L3Bank02Accesses UADD $L3Bank03Accesses UADD $L3Bank04Accesses UADD $L3Bank05Accesses UADD $L3Bank06Accesses UADD $L3Bank07Accesses UADD"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
+             description="The total number of L3 cache lookup accesses w/o IC."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ B 6 READ UADD 8 UMUL A 32 READ UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="2 B 5 READ B 4 READ UADD UMUL"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
              units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 3 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
-             units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="64  B 7 READ B 6 READ UADD 8 UMUL UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 2 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen11"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="045dbb74-1e1a-499c-9a50-bac498e34699"
        chipset="EHL"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="045dbb74-1e1a-499c-9a50-bac498e34699"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             high_watermark="15"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="hi_depth_bottleneck"
              units="percent"
-             symbol_name="HiDepthBottleneck"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="bc_bottleneck"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="10"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="sf_bottleneck"
-             units="percent"
-             symbol_name="SfBottleneck"
-             semantic_type="duration"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
-             units="percent"
-             symbol_name="SfStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             high_watermark="9"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Clipper"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SO Bottleneck"
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="so_bottleneck"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="ds_bottleneck"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="SO Bottleneck"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
+             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Clipper Bottleneck"
-             low_watermark="10"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
              description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="10"
              high_watermark="30"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Early Depth Bottleneck"
-             low_watermark="10"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
              description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="10"
              high_watermark="30"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="early_depth_bottleneck"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
+             description="The percentage of time in which stream-output pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SO Stall"
-             description="The percentage of time in which stream-output pipeline stage was stalled."
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="63af815c-30f9-4dd5-81fa-351ad6b69b4b"
        chipset="EHL"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="63af815c-30f9-4dd5-81fa-351ad6b69b4b"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
-             units="percent"
-             symbol_name="GTRequestQueueFull"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 6 READ"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1)"
-             data_type="float"
-             max_equation="100"
-             equation="C 2 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
-             units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2)"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
     <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
              description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
              units="pixels"
-             symbol_name="SamplesKilledInPs"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0)"
-             data_type="float"
-             max_equation="100"
-             equation="C 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
-             units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             semantic_type="duration"
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice3)"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader03_access_stalled_on_l3"
-             units="percent"
-             symbol_name="NonSamplerShader03AccessStalledOnL3"
-             semantic_type="duration"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
+             units="messages"
+             semantic_type="event"
              equation="A 34 READ"
-             underscore_name="shader_atomics"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader03AccessStalledOnL3"
+             underscore_name="non_sampler_shader03_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice3)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="20df0e83-535f-457f-a08b-76aee88c1e0e"
        chipset="EHL"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="20df0e83-535f-457f-a08b-76aee88c1e0e"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
-             units="percent"
-             symbol_name="GTRequestQueueFull"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Pipe0 PS Output Available"
-             description="The percentage of time in which slice0 pipe0 PS output is available"
-             data_type="float"
-             max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output00_available"
-             units="percent"
-             symbol_name="PSOutput00Available"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
+             units="threads"
+             semantic_type="event"
              equation="A 5 READ"
-             underscore_name="gs_threads"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Slice0 Pipe1 PS Output Available"
-             description="The percentage of time in which slice0 pipe1 PS output is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output01_available"
              units="percent"
-             symbol_name="PSOutput01Available"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Pipe1 Pixel Values Ready"
-             description="The percentage of time in which slice0 pipe1 pixel values are ready"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values01_ready"
              units="percent"
-             symbol_name="PixelValues01Ready"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Pipe0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data00_ready"
              units="percent"
-             symbol_name="PixelData00Ready"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="Slice0 Pipe1 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0  pipe1 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data01_ready"
-             units="percent"
-             symbol_name="PixelData01Ready"
-             semantic_type="duration"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
-             units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Slice0 Pipe0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pipe0 pixel values are ready"
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values00_ready"
              units="percent"
-             symbol_name="PixelValues00Ready"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GPU/Rasterizer"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GPU/Rasterizer"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 Pipe0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData00Ready"
+             underscore_name="pixel_data00_ready"
+             description="The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
+    <counter name="Slice0 Pipe1 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData01Ready"
+             underscore_name="pixel_data01_ready"
+             description="The percentage of time in which slice0  pipe1 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
              units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
+    <counter name="Slice0 Pipe0 PS Output Available"
+             symbol_name="PSOutput00Available"
+             underscore_name="ps_output00_available"
+             description="The percentage of time in which slice0 pipe0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+    <counter name="Slice0 Pipe1 PS Output Available"
+             symbol_name="PSOutput01Available"
+             underscore_name="ps_output01_available"
+             description="The percentage of time in which slice0 pipe1 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="Slice0 Pipe0 Pixel Values Ready"
+             symbol_name="PixelValues00Ready"
+             underscore_name="pixel_values00_ready"
+             description="The percentage of time in which slice0 pipe0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="Slice0 Pipe1 Pixel Values Ready"
+             symbol_name="PixelValues01Ready"
+             underscore_name="pixel_values01_ready"
+             description="The percentage of time in which slice0 pipe1 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="c7d3dc5c-975d-4d3a-a4c8-86fb0085743b"
        chipset="EHL"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="c7d3dc5c-975d-4d3a-a4c8-86fb0085743b"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
              units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
              units="percent"
-             symbol_name="GTRequestQueueFull"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Slice0 L3 Bank5 Active"
-             description="The percentage of time in which slice0 L3 bank5 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank5_active"
-             units="percent"
-             symbol_name="L30Bank5Active"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank6 Active"
-             description="The percentage of time in which slice0 L3 bank6 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank6_active"
-             units="percent"
-             symbol_name="L30Bank6Active"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="Slice0 L3 Bank4 Active"
+             symbol_name="L30Bank4Active"
+             underscore_name="l30_bank4_active"
+             description="The percentage of time in which slice0 L3 bank4 is active"
              data_type="float"
              max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="Slice0 L3 Bank5 Active"
+             symbol_name="L30Bank5Active"
+             underscore_name="l30_bank5_active"
+             description="The percentage of time in which slice0 L3 bank5 is active"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 L3 Bank6 Active"
+             symbol_name="L30Bank6Active"
+             underscore_name="l30_bank6_active"
+             description="The percentage of time in which slice0 L3 bank6 is active"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Slice0 L3 Bank7 Active"
+             symbol_name="L30Bank7Active"
+             underscore_name="l30_bank7_active"
              description="The percentage of time in which slice0 L3 bank7 is active"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank7_active"
              units="percent"
-             symbol_name="L30Bank7Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 L3 Bank4 Active"
-             description="The percentage of time in which slice0 L3 bank4 is active"
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank4_active"
              units="percent"
-             symbol_name="L30Bank4Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Gen11 L2Bank0 stalled metric set"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="04003388-30ef-45d6-ae89-86f71ee596f4"
        chipset="EHL"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="04003388-30ef-45d6-ae89-86f71ee596f4"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
              units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Gen11 L2Bank1 stalled metric set"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="12f706a4-3761-4448-83dc-63495da010ff"
        chipset="EHL"
        symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="12f706a4-3761-4448-83dc-63495da010ff"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Gen11 L2Bank4 stalled metric set"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_4"
-       hw_config_guid="b03ae6f7-fdc0-4879-9e65-5b221ac9625f"
        chipset="EHL"
        symbol_name="L3_4"
+       underscore_name="l3_4"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="b03ae6f7-fdc0-4879-9e65-5b221ac9625f"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Slice0 L3 Bank4 Stalled"
-             description="The percentage of time in which slice0 L3 bank4 is stalled"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank4_stalled"
              units="percent"
-             symbol_name="L30Bank4Stalled"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Slice0 L3 Bank4 Stalled"
+             symbol_name="L30Bank4Stalled"
+             underscore_name="l30_bank4_stalled"
+             description="The percentage of time in which slice0 L3 bank4 is stalled"
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Gen11 L2Bank5 stalled metric set"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_5"
-       hw_config_guid="cdec4315-3c8d-416f-b3ae-33f7590e1439"
        chipset="EHL"
        symbol_name="L3_5"
+       underscore_name="l3_5"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="cdec4315-3c8d-416f-b3ae-33f7590e1439"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 L3 Bank5 Stalled"
-             description="The percentage of time in which slice0 L3 bank5 is stalled"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ B 3 READ FADD B 0 READ FADD B 1 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank5_stalled"
              units="percent"
-             symbol_name="L30Bank5Stalled"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Slice0 L3 Bank5 Stalled"
+             symbol_name="L30Bank5Stalled"
+             underscore_name="l30_bank5_stalled"
+             description="The percentage of time in which slice0 L3 bank5 is stalled"
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="B 2 READ B 3 READ FADD B 0 READ FADD B 1 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set Sampler 1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler_1"
-       hw_config_guid="377c06c4-5f41-465a-bf2e-60d95200ba94"
        chipset="EHL"
        symbol_name="Sampler_1"
+       underscore_name="sampler_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="377c06c4-5f41-465a-bf2e-60d95200ba94"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
-             units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 4 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
-             units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
              units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Slice0 Subslice6 Input Available"
-             description="The percentage of time in which slice0 subslice6 sampler input is available"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler06_input_available"
              units="percent"
-             symbol_name="Sampler06InputAvailable"
-             availability="$SubsliceMask 64 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice4 Input Available"
-             description="The percentage of time in which slice0 subslice4 sampler input is available"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler04_input_available"
              units="percent"
-             symbol_name="Sampler04InputAvailable"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 21 READ 4 UMUL"
              mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="Slice0 Subslice3 Input Available"
-             description="The percentage of time in which slice0 subslice3 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler03_input_available"
-             units="percent"
-             symbol_name="Sampler03InputAvailable"
-             availability="$SubsliceMask 8 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice5 Input Available"
-             description="The percentage of time in which slice0 subslice5 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler05_input_available"
-             units="percent"
-             symbol_name="Sampler05InputAvailable"
-             availability="$SubsliceMask 32 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice0 Subslice7 Input Available"
-             description="The percentage of time in which slice0 subslice7 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler07_input_available"
-             units="percent"
-             symbol_name="Sampler07InputAvailable"
-             availability="$SubsliceMask 128 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <register_config type="NOA">
-        <register type="NOA" address="0x00000D04" value="0x00000200" />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice3 Input Available"
+             symbol_name="Sampler03InputAvailable"
+             underscore_name="sampler03_input_available"
+             description="The percentage of time in which slice0 subslice3 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice4 Input Available"
+             symbol_name="Sampler04InputAvailable"
+             underscore_name="sampler04_input_available"
+             description="The percentage of time in which slice0 subslice4 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice5 Input Available"
+             symbol_name="Sampler05InputAvailable"
+             underscore_name="sampler05_input_available"
+             description="The percentage of time in which slice0 subslice5 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice6 Input Available"
+             symbol_name="Sampler06InputAvailable"
+             underscore_name="sampler06_input_available"
+             description="The percentage of time in which slice0 subslice6 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice7 Input Available"
+             symbol_name="Sampler07InputAvailable"
+             underscore_name="sampler07_input_available"
+             description="The percentage of time in which slice0 subslice7 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <register_config type="NOA">
+        <register type="NOA" address="0x00000D04" value="0x00000200" />
         <register type="NOA" address="0x00009840" value="0x00000000" />
         <register type="NOA" address="0x00009884" value="0x00000000" />
         <register type="NOA" address="0x00009888" value="0x142A0165" />
   </set>
 
   <set name="Metric set Sampler 2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler_2"
-       hw_config_guid="58326100-b3e7-4554-b612-592e16dc6fd1"
        chipset="EHL"
        symbol_name="Sampler_2"
+       underscore_name="sampler_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="58326100-b3e7-4554-b612-592e16dc6fd1"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
-             data_type="float"
-             max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
-             units="percent"
-             symbol_name="GTRequestQueueFull"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
              units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 4 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Subslice6 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice6 sampler output is ready"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler06_output_ready"
              units="percent"
-             symbol_name="Sampler06OutputReady"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice4 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice4 sampler output is ready"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler04_output_ready"
              units="percent"
-             symbol_name="Sampler04OutputReady"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Slice0 Subslice3 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice3 sampler output is ready"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler03_output_ready"
              units="percent"
-             symbol_name="Sampler03OutputReady"
-             availability="$SubsliceMask 8 AND"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice7 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice7 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler07_output_ready"
-             units="percent"
-             symbol_name="Sampler07OutputReady"
-             availability="$SubsliceMask 128 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="Slice0 Subslice5 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice5 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler05_output_ready"
-             units="percent"
-             symbol_name="Sampler05OutputReady"
-             availability="$SubsliceMask 32 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice0 Subslice0 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
-             units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice1 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
-             units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 2 AND"
-             semantic_type="duration"
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
+             description="The percentage of time in which slice0 subslice0 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
+             description="The percentage of time in which slice0 subslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice3 Sampler Output Ready"
+             symbol_name="Sampler03OutputReady"
+             underscore_name="sampler03_output_ready"
+             description="The percentage of time in which slice0 subslice3 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice4 Sampler Output Ready"
+             symbol_name="Sampler04OutputReady"
+             underscore_name="sampler04_output_ready"
+             description="The percentage of time in which slice0 subslice4 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice5 Sampler Output Ready"
+             symbol_name="Sampler05OutputReady"
+             underscore_name="sampler05_output_ready"
+             description="The percentage of time in which slice0 subslice5 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice6 Sampler Output Ready"
+             symbol_name="Sampler06OutputReady"
+             underscore_name="sampler06_output_ready"
+             description="The percentage of time in which slice0 subslice6 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice7 Sampler Output Ready"
+             symbol_name="Sampler07OutputReady"
+             underscore_name="sampler07_output_ready"
+             description="The percentage of time in which slice0 subslice7 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="4c71735a-dfbf-4ebe-9df3-6c3db344a466"
        chipset="EHL"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="4c71735a-dfbf-4ebe-9df3-6c3db344a466"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS EU Active"
+             symbol_name="VsEuActive"
+             underscore_name="vs_eu_active"
+             description="The percentage of time in which vertex shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS AVG Active per Thread"
+             symbol_name="VsEuActivePerThread"
+             underscore_name="vs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
+             equation="A 13 READ A 1 READ UDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="VS EU Stall"
+             symbol_name="VsEuStall"
+             underscore_name="vs_eu_stall"
+             description="The percentage of time in which vertex shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS AVG Stall per Thread"
+             symbol_name="VsEuStallPerThread"
+             underscore_name="vs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="cycles"
              semantic_type="event"
+             equation="A 14 READ A 1 READ UDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
     <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
              description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+    <counter name="FS EU Active"
+             symbol_name="PsEuActive"
+             underscore_name="ps_eu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="FS AVG Active per Thread"
+             symbol_name="PsEuActivePerThread"
+             underscore_name="ps_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="cycles"
              semantic_type="event"
+             equation="A 19 READ A 6 READ UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice3 thread dispatcher"
+    <counter name="FS EU Stall"
+             symbol_name="PsEuStall"
+             underscore_name="ps_eu_stall"
+             description="The percentage of time in which fragment shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread03_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread03ReadyForDispatch"
-             availability="$SubsliceMask 8 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS AVG Stall per Thread"
+             symbol_name="PsEuStallPerThread"
+             underscore_name="ps_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="cycles"
              semantic_type="event"
+             equation="A 20 READ A 6 READ UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 20 READ A 6 READ UDIV"
-             underscore_name="ps_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="PsEuStallPerThread"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 14 READ A 1 READ UDIV"
-             underscore_name="vs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="VsEuStallPerThread"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 19 READ A 6 READ UDIV"
-             underscore_name="ps_eu_active_per_thread"
-             units="cycles"
-             symbol_name="PsEuActivePerThread"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="FS EU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_active"
-             units="percent"
-             symbol_name="PsEuActive"
-             semantic_type="duration"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 34 READ"
              mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice7 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread07_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread07ReadyForDispatch"
-             availability="$SubsliceMask 128 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="FS EU Stall"
-             description="The percentage of time in which fragment shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_stall"
-             units="percent"
-             symbol_name="PsEuStall"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice5 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread05_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread05ReadyForDispatch"
-             availability="$SubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="VS EU Active"
-             description="The percentage of time in which vertex shaders were processed actively on the EUs."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_active"
              units="percent"
-             symbol_name="VsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher"
+             symbol_name="PSThread03ReadyForDispatch"
+             underscore_name="ps_thread03_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice3 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher"
+             symbol_name="PSThread04ReadyForDispatch"
+             underscore_name="ps_thread04_ready_for_dispatch"
              description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice4 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread04_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread04ReadyForDispatch"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="VS EU Stall"
-             description="The percentage of time in which vertex shaders were stalled on the EUs."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher"
+             symbol_name="PSThread05ReadyForDispatch"
+             underscore_name="ps_thread05_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice5 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_stall"
              units="percent"
-             symbol_name="VsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="VS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
-             data_type="uint64"
-             equation="A 13 READ A 1 READ UDIV"
-             underscore_name="vs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="VsEuActivePerThread"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher"
+             symbol_name="PSThread06ReadyForDispatch"
+             underscore_name="ps_thread06_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice6 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice6 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher"
+             symbol_name="PSThread07ReadyForDispatch"
+             underscore_name="ps_thread07_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice7 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread06_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread06ReadyForDispatch"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="80e75f86-f8bc-4903-bf0f-38fd26cca636"
        chipset="EHL"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="80e75f86-f8bc-4903-bf0f-38fd26cca636"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
-             units="percent"
-             symbol_name="GTRequestQueueFull"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="GPU_CLOCK 0 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 4 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice3 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="GPU_CLOCK 0 READ C 2 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread03_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread03ReadyForDispatch"
-             availability="$SubsliceMask 8 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice7 thread dispatcher"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 6 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread07_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread07ReadyForDispatch"
-             availability="$SubsliceMask 128 AND"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice6 thread dispatcher"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread06_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread06ReadyForDispatch"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 0 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice5 thread dispatcher"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher"
+             symbol_name="NonPSThread03ReadyForDispatch"
+             underscore_name="non_ps_thread03_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice3 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 4 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread05_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread05ReadyForDispatch"
-             availability="$SubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ C 2 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher"
+             symbol_name="NonPSThread04ReadyForDispatch"
+             underscore_name="non_ps_thread04_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice4 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread04_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread04ReadyForDispatch"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher"
+             symbol_name="NonPSThread05ReadyForDispatch"
+             underscore_name="non_ps_thread05_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice5 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 4 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher"
+             symbol_name="NonPSThread06ReadyForDispatch"
+             underscore_name="non_ps_thread06_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice6 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher"
+             symbol_name="NonPSThread07ReadyForDispatch"
+             underscore_name="non_ps_thread07_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice7 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 6 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 0 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
-             availability="$SubsliceMask 2 AND"
              semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set TDL_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_3"
-       hw_config_guid="9c517add-e263-4cbe-b7f8-9986f737e68a"
        chipset="EHL"
        symbol_name="TDL_3"
+       underscore_name="tdl_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="9c517add-e263-4cbe-b7f8-9986f737e68a"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 1"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header06_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader06ReadyPort1"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 0"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header03_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader03ReadyPort0"
-             availability="$SubsliceMask 8 AND"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 1"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header03_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader03ReadyPort1"
-             availability="$SubsliceMask 8 AND"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 1"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header04_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader04ReadyPort1"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 0"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header04_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader04ReadyPort0"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header06_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader06ReadyPort0"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
-             units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
-             units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 2 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header07_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader07ReadyPort0"
-             availability="$SubsliceMask 128 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
              description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader03ReadyPort0"
+             underscore_name="thread_header03_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header05_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader05ReadyPort0"
-             availability="$SubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader03ReadyPort1"
+             underscore_name="thread_header03_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 1"
+    <counter name="Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader04ReadyPort0"
+             underscore_name="thread_header04_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header07_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader07ReadyPort1"
-             availability="$SubsliceMask 128 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+    <counter name="Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader04ReadyPort1"
+             underscore_name="thread_header04_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader05ReadyPort0"
+             underscore_name="thread_header05_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader05ReadyPort1"
+             underscore_name="thread_header05_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header05_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader05ReadyPort1"
-             availability="$SubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader06ReadyPort0"
+             underscore_name="thread_header06_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader06ReadyPort1"
+             underscore_name="thread_header06_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader07ReadyPort0"
+             underscore_name="thread_header07_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
+    <counter name="Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader07ReadyPort1"
+             underscore_name="thread_header07_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="c3103887-8c7c-482a-b923-3d68dd340598"
        chipset="EHL"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="c3103887-8c7c-482a-b923-3d68dd340598"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Posh Ring Busy"
-             description="The percentage of time when posh command streamer was busy."
+    <counter name="Vdbox1 Ring Busy"
+             symbol_name="Vdbox1Busy"
+             underscore_name="vdbox1_busy"
+             description="The percentage of time when Vdbox1 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="posh_engine_busy"
              units="percent"
-             symbol_name="PoshEngineBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Blitter Ring Busy"
-             description="The percentage of time when blitter command streamer was busy."
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vdbox1 Ring Busy"
-             description="The percentage of time when Vdbox1 command streamer was busy."
+    <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
+             description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox1_busy"
              units="percent"
-             symbol_name="Vdbox1Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
              description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Posh Ring Busy"
+             symbol_name="PoshEngineBusy"
+             underscore_name="posh_engine_busy"
+             description="The percentage of time when posh command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TestOa"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="82b6b321-f93e-445a-a285-f30391ef1dc5"
        chipset="EHL"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="82b6b321-f93e-445a-a285-f30391ef1dc5"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.6666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.3333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.3333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.16666"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.3333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.3333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.16666"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.6666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
index e3da757..a10d701 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1522878594" merge_md5="">
   <set name="Render Metrics Basic Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="d72df5c7-5b4a-4274-a43f-00b0fd51fc68"
        chipset="GLK"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="d72df5c7-5b4a-4274-a43f-00b0fd51fc68"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
-             data_type="uint64"
-             equation="B 4 READ 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler1_bottleneck"
              units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
              description="The percentage of time in which Sampler 0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which Sampler 1 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="L3 Lookup Accesses w/o IC"
-             description="The total number of L3 cache lookup accesses w/o IC."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
              units="messages"
-             symbol_name="L3Lookups"
              semantic_type="event"
+             equation="B 4 READ 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
+             description="The total number of L3 cache lookup accesses w/o IC."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Depth Throughput"
-             description="The total number of GPU memory bytes transferred between depth caches and GTI."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
+             description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
              description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
              mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Basic Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="814285f6-354d-41d2-ba49-e24e622714a0"
        chipset="GLK"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="814285f6-354d-41d2-ba49-e24e622714a0"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL  $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ B 1 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL  $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 1 READ C 2 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="07d397a6-b3e6-49f6-9433-a4f293d55978"
        chipset="GLK"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="07d397a6-b3e6-49f6-9433-a4f293d55978"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="VsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             high_watermark="15"
              data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
              units="percent"
-             symbol_name="SfBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
     <counter name="SO Bottleneck"
-             low_watermark="5"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
              description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             high_watermark="10"
              data_type="float"
-             high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Clipper Bottleneck"
+    <counter name="Early Depth Bottleneck"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
+             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
              low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
-    <counter name="Early Depth Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
              data_type="float"
-             high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Reads Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="1a356946-5428-450b-a2f0-89f8783a302d"
        chipset="GLK"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="1a356946-5428-450b-a2f0-89f8783a302d"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
-             data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
-             semantic_type="event"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
+             units="threads"
              semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
              units="messages"
-             symbol_name="GtiL3Bank1Reads"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
              description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
              units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
              units="messages"
-             symbol_name="GtiL3Bank2Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ"
              mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Writes Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="5299be9d-7a61-4c99-9f81-f87e6c5aaca9"
        chipset="GLK"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="5299be9d-7a61-4c99-9f81-f87e6c5aaca9"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
-             data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiSoMemoryWrites"
-             description="The total number of GTI memory writes from Stream Output."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
+             description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Extended Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="bc9bcff2-459a-4cbc-986d-a84b077153f3"
        chipset="GLK"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="bc9bcff2-459a-4cbc-986d-a84b077153f3"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
-             units="messages"
-             symbol_name="EuTypedAtomics0"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
              description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
              units="messages"
-             symbol_name="UntypedWrites0"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
              description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
              units="messages"
-             symbol_name="EuUntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
+             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
              description="The subslice 0 EU A64 Untyped Writes subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
              units="messages"
-             symbol_name="EuA64UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="B 7 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
-             data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
-             units="messages"
-             symbol_name="TypedReads0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedReads0"
-             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
-             data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
-             units="messages"
-             symbol_name="EuTypedReads0"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
              description="The ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="The ratio of EU typed read requests to L3 cache line reads."
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="88ec931f-5b4a-453a-9db6-a61232b6143d"
        chipset="GLK"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="88ec931f-5b4a-453a-9db6-a61232b6143d"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu0Instruction"
-             semantic_type="duration"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
              data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
-             units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
              units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
              units="messages"
-             symbol_name="SamplerAccesses"
              semantic_type="event"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
              description="The total number of accesses to L3 Bank 01."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
+             description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Bank 02 Accesses"
-             description="The total number of accesses to L3 Bank 02."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
              units="bytes"
-             symbol_name="L3TotalThroughput"
              semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="C 6 READ 64 UMUL"
              mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
         <register type="NOA" address="0x00009888" value="0x166C03B0" />
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="530d176d-2a18-4014-adf8-1500c6c60835"
        chipset="GLK"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="530d176d-2a18-4014-adf8-1500c6c60835"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
+             symbol_name="SamplesWritten"
              underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
         <register type="FLEX" address="0x0000E45C" value="0x00051050" />
         <register type="FLEX" address="0x0000E55C" value="0x00053052" />
         <register type="FLEX" address="0x0000E65C" value="0x00055054" />
-    </register_config>
-  </set>
-
-  <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="fdee5a5a-f23c-43d1-aa73-f6257c71671d"
-       chipset="GLK"
-       symbol_name="L3_1"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+    </register_config>
+  </set>
+
+  <set name="Metric set L3_1"
+       chipset="GLK"
+       symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="fdee5a5a-f23c-43d1-aa73-f6257c71671d"
+       >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
              units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="6617623e-ca73-4791-b2b7-ddedd0846a0c"
        chipset="GLK"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="6617623e-ca73-4791-b2b7-ddedd0846a0c"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
-             units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set Sampler"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler"
-       hw_config_guid="f3b2ea63-e82e-4234-b418-44dd20dd34d0"
        chipset="GLK"
        symbol_name="Sampler"
+       underscore_name="sampler"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="f3b2ea63-e82e-4234-b418-44dd20dd34d0"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
-             units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
-             units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
              units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice0 Subslice0 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
-             units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Slice0 Subslice1 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
-             units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
+             description="The percentage of time in which slice0 subslice0 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
+             description="The percentage of time in which slice0 subslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="14411d35-cbf6-4f5e-b68b-190faf9a1a83"
        chipset="GLK"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="14411d35-cbf6-4f5e-b68b-190faf9a1a83"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
     <register_config type="FLEX">
         <register type="FLEX" address="0x0000E458" value="0x00005004" />
         <register type="FLEX" address="0x0000E558" value="0x00010003" />
-        <register type="FLEX" address="0x0000E658" value="0x00012011" />
-        <register type="FLEX" address="0x0000E758" value="0x00015014" />
-        <register type="FLEX" address="0x0000E45C" value="0x00051050" />
-        <register type="FLEX" address="0x0000E55C" value="0x00053052" />
-        <register type="FLEX" address="0x0000E65C" value="0x00055054" />
-    </register_config>
-  </set>
-
-  <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="ffa3f263-0478-4724-8c9f-c911c5ec0f1d"
-       chipset="GLK"
-       symbol_name="TDL_2"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+        <register type="FLEX" address="0x0000E658" value="0x00012011" />
+        <register type="FLEX" address="0x0000E758" value="0x00015014" />
+        <register type="FLEX" address="0x0000E45C" value="0x00051050" />
+        <register type="FLEX" address="0x0000E55C" value="0x00053052" />
+        <register type="FLEX" address="0x0000E65C" value="0x00055054" />
+    </register_config>
+  </set>
+
+  <set name="Metric set TDL_2"
+       chipset="GLK"
+       symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="ffa3f263-0478-4724-8c9f-c911c5ec0f1d"
+       >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
-             units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
-             data_type="float"
-             max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
-             units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="15274c82-27d2-4819-876a-7cb1a2c59ba4"
        chipset="GLK"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="15274c82-27d2-4819-876a-7cb1a2c59ba4"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
-             units="percent"
-             symbol_name="Fpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active including Ext Math"
+             symbol_name="Fpu1ActiveAdjusted"
+             underscore_name="fpu1_active_adjusted"
              description="The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active_adjusted"
              units="percent"
-             symbol_name="Fpu1ActiveAdjusted"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="dd3fd789-e783-4204-8cd0-b671bbccb0cf"
        chipset="GLK"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="dd3fd789-e783-4204-8cd0-b671bbccb0cf"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
-       underscore_name="pma__stall"
-       hw_config_guid="e6868953-fb47-431d-a060-f785916558fc"
        chipset="GLK"
        symbol_name="PMA_Stall"
+       underscore_name="pma__stall"
+       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
+       hw_config_guid="e6868953-fb47-431d-a060-f785916558fc"
        >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
              description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GPU/Stencil Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index a3bed73..80bb7a3 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1520266058" merge_md5="">
   <set name="Render Metrics Basic Gen7.5"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="403d8832-1a27-4aa6-a64e-f5389ce7b212"
        chipset="HSW"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="403d8832-1a27-4aa6-a64e-f5389ce7b212"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="C 2 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TES EU Stall"
-             description="The percentage of time in which evaluation shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_stall"
-             units="percent"
-             symbol_name="DsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Alpha Test Fails"
-             description="The total number of pixels dropped on post-FS alpha test."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 37 READ"
-             underscore_name="alpha_test_fails"
-             units="pixels"
-             symbol_name="AlphaTestFails"
+             units="threads"
              semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which sampler 1 was bottlenecks."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="sampler1_bottleneck"
-             units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TES Threads Dispatched"
-             description="The total number of evaluation shader hardware threads dispatched."
+    <counter name="TCS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of control shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 15 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 10 READ"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
              />
-    <counter name="TES AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs."
+    <counter name="TES Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of evaluation shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="100"
-             equation="A 12 READ $DsThreads UDIV"
-             underscore_name="ds_eu_active_per_thread"
-             units="cycles"
-             symbol_name="DsEuActivePerThread"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 15 READ"
              mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
+             equation="A 25 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 30 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GS EU Stall"
-             description="The percentage of time in which geometry shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_stall"
-             units="percent"
-             symbol_name="GsEuStall"
-             semantic_type="duration"
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 20 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS EU Active"
-             description="The percentage of time in which compute shaders were processed actively on the EUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has being processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_active"
              units="percent"
-             symbol_name="CsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS EU Active"
-             description="The percentage of time in which vertex shaders were processed actively on the EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_active"
              units="percent"
-             symbol_name="VsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TCS EU Active"
-             description="The percentage of time in which control shaders were processed actively on the EUs."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_active"
              units="percent"
-             symbol_name="HsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TES EU Active"
-             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
+    <counter name="VS EU Active"
+             symbol_name="VsEuActive"
+             underscore_name="vs_eu_active"
+             description="The percentage of time in which vertex shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_active"
              units="percent"
-             symbol_name="DsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS EU Active"
-             description="The percentage of time in which geometry shaders were processed actively on the EUs."
-             data_type="float"
+    <counter name="VS AVG Active per Thread"
+             symbol_name="VsEuActivePerThread"
+             underscore_name="vs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
+             data_type="uint64"
              max_equation="100"
-             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_active"
-             units="percent"
-             symbol_name="GsEuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
+             units="cycles"
+             semantic_type="event"
+             equation="A 2 READ $VsThreads UDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS EU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+    <counter name="VS AVG Stall per Thread"
+             symbol_name="VsEuStallPerThread"
+             underscore_name="vs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="A 3 READ $VsThreads UDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS EU Stall"
+             symbol_name="VsEuStall"
+             underscore_name="vs_eu_stall"
+             description="The percentage of time in which vertex shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_active"
              units="percent"
-             symbol_name="PsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS EU Stall"
-             description="The percentage of time in which compute shaders were stalled on the EUs."
+    <counter name="TCS EU Active"
+             symbol_name="HsEuActive"
+             underscore_name="hs_eu_active"
+             description="The percentage of time in which control shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_stall"
              units="percent"
-             symbol_name="CsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="TCS AVG Active per Thread"
+             symbol_name="HsEuActivePerThread"
+             underscore_name="hs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
+             data_type="uint64"
+             max_equation="100"
+             units="cycles"
+             semantic_type="event"
+             equation="A 7 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TCS AVG Stall per Thread"
+             symbol_name="HsEuStallPerThread"
+             underscore_name="hs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="A 8 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TCS EU Stall"
+             symbol_name="HsEuStall"
+             underscore_name="hs_eu_stall"
+             description="The percentage of time in which control shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS EU Stall"
-             description="The percentage of time in which vertex shaders were stalled on the EUs."
+    <counter name="TES EU Active"
+             symbol_name="DsEuActive"
+             underscore_name="ds_eu_active"
+             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_stall"
              units="percent"
-             symbol_name="VsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TCS EU Stall"
-             description="The percentage of time in which control shaders were stalled on the EUs."
+    <counter name="TES AVG Active per Thread"
+             symbol_name="DsEuActivePerThread"
+             underscore_name="ds_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs."
+             data_type="uint64"
+             max_equation="100"
+             units="cycles"
+             semantic_type="event"
+             equation="A 12 READ $DsThreads UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TES AVG Stall per Thread"
+             symbol_name="DsEuStallPerThread"
+             underscore_name="ds_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="A 13 READ $DsThreads UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TES EU Stall"
+             symbol_name="DsEuStall"
+             underscore_name="ds_eu_stall"
+             description="The percentage of time in which evaluation shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_stall"
              units="percent"
-             symbol_name="HsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS EU Stall"
-             description="The percentage of time in which fragment shaders were stalled on the EUs."
+    <counter name="GS EU Active"
+             symbol_name="GsEuActive"
+             underscore_name="gs_eu_active"
+             description="The percentage of time in which geometry shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_stall"
              units="percent"
-             symbol_name="PsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="GS AVG Active per Thread"
+             symbol_name="GsEuActivePerThread"
+             underscore_name="gs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="100"
+             units="cycles"
+             semantic_type="event"
+             equation="A 22 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="CS Duration"
-             description="Total Compute Shader GPU duration."
+    <counter name="GS AVG Stall per Thread"
+             symbol_name="GsEuStallPerThread"
+             underscore_name="gs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 17 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 18 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="cs_duration"
-             units="us"
-             symbol_name="CsDuration"
+             units="cycles"
+             semantic_type="event"
+             equation="A 23 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS EU Stall"
+             symbol_name="GsEuStall"
+             underscore_name="gs_eu_stall"
+             description="The percentage of time in which geometry shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS EU Active"
+             symbol_name="CsEuActive"
+             underscore_name="cs_eu_active"
+             description="The percentage of time in which compute shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="CS AVG Active per Thread"
+             symbol_name="CsEuActivePerThread"
+             underscore_name="cs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             max_equation="100"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS AVG Stall per Thread"
+             symbol_name="CsEuStallPerThread"
+             underscore_name="cs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 30 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 18 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Sampler 0 Busy"
-             description="The percentage of time in which sampler 0 was busy."
+    <counter name="CS EU Stall"
+             symbol_name="CsEuStall"
+             underscore_name="cs_eu_stall"
+             description="The percentage of time in which compute shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS EU Active"
+             symbol_name="PsEuActive"
+             underscore_name="ps_eu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS AVG Active per Thread"
+             symbol_name="PsEuActivePerThread"
+             underscore_name="ps_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
+             data_type="uint64"
+             max_equation="100"
+             units="cycles"
+             semantic_type="event"
+             equation="A 27 READ $PsThreads UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS AVG Stall per Thread"
+             symbol_name="PsEuStallPerThread"
+             underscore_name="ps_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="A 28 READ $PsThreads UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS EU Stall"
+             symbol_name="PsEuStall"
+             underscore_name="ps_eu_stall"
+             description="The percentage of time in which fragment shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
+             semantic_type="duration"
+             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler 0 Busy"
              symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x1 AND"
+             underscore_name="sampler0_busy"
+             description="The percentage of time in which sampler 0 was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which sampler 1 was busy."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers were busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ  B 1 READ UADD $GpuCoreClocks FDIV 2 FDIV 100 FMUL"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="B 0 READ  B 1 READ UADD $GpuCoreClocks FDIV 2 FDIV 100 FMUL"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="TES Duration"
-             description="Total Evaluation Shader GPU duration."
-             data_type="uint64"
-             equation="A 12 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 13 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="ds_duration"
-             units="us"
-             symbol_name="DsDuration"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 1 READ 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 128 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 20 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="CS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
-             data_type="uint64"
-             max_equation="100"
-             equation="A 17 READ $CsThreads UDIV"
-             underscore_name="cs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="CsEuActivePerThread"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which sampler 0 was bottlenecks."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="GS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
-             data_type="uint64"
-             equation="A 23 READ $GsThreads UDIV"
-             underscore_name="gs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="GsEuStallPerThread"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which sampler 1 was bottlenecks."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 0 Texels LOD0"
+             symbol_name="Sampler0Texels"
+             underscore_name="sampler0_texels"
              description="The total number of texels lookups in LOD0 in sampler 0 unit."
              data_type="uint64"
-             equation="B 4 READ 4 UMUL"
-             underscore_name="sampler0_texels"
              units="texels"
-             symbol_name="Sampler0Texels"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ 4 UMUL"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="Sampler"
              />
     <counter name="Sampler 1 Texels LOD0"
+             symbol_name="Sampler1Texels"
+             underscore_name="sampler1_texels"
              description="The total number of texels lookups in LOD0 in sampler 1 unit."
              data_type="uint64"
-             equation="B 5 READ 4 UMUL"
-             underscore_name="sampler1_texels"
              units="texels"
-             symbol_name="Sampler1Texels"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ 4 UMUL"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="Sampler"
              />
     <counter name="Sampler Texels LOD0"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
              description="The total number of texels lookups in LOD0 in all sampler units."
              data_type="uint64"
-             equation="$Sampler0Texels $Sampler1Texels UADD $EuSlicesTotalCount UMUL"
-             underscore_name="sampler_texels"
              units="texels"
-             symbol_name="SamplerTexels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$Sampler0Texels $Sampler1Texels UADD $EuSlicesTotalCount UMUL"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="GS Duration"
-             description="Total Geometry Shader GPU duration."
-             data_type="uint64"
-             equation="A 22 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 23 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="gs_duration"
-             units="us"
-             symbol_name="GsDuration"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Idle"
-             description="The percentage of time in which the Execution Units were idle."
-             data_type="float"
-             max_equation="100"
-             equation="100 $EuActive $EuStall FADD FSUB"
-             underscore_name="eu_idle"
-             units="percent"
-             symbol_name="EuIdle"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI Depth Throughput"
-             description="The total number of GPU memory bytes transferred between depth caches and GTI."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ 64 UMUL"
-             underscore_name="gti_depth_throughput"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 6 READ B 7 READ UADD 2 UMUL $EuSlicesTotalCount UMUL 64 UMUL"
+             mdapi_group="L3/Sampler"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Depth Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
-             units="bytes"
-             symbol_name="GtiWriteThroughput"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
-             data_type="uint64"
-             equation="A 28 READ $PsThreads UDIV"
-             underscore_name="ps_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="PsEuStallPerThread"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
-             data_type="uint64"
-             equation="A 3 READ $VsThreads UDIV"
-             underscore_name="vs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="VsEuStallPerThread"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="C 5 READ $EuSlicesTotalCount 4 UMUL UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has being processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="FS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
-             data_type="uint64"
-             max_equation="100"
-             equation="A 27 READ $PsThreads UDIV"
-             underscore_name="ps_eu_active_per_thread"
-             units="cycles"
-             symbol_name="PsEuActivePerThread"
              semantic_type="event"
+             equation="A 33 READ"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
              description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 35 READ"
              mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="TCS Duration"
-             description="Total Control Shader GPU duration."
-             data_type="uint64"
-             equation="A 7 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 8 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="hs_duration"
-             units="us"
-             symbol_name="HsDuration"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
-             />
-    <counter name="TES AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs."
-             data_type="uint64"
-             equation="A 13 READ $DsThreads UDIV"
-             underscore_name="ds_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="DsEuStallPerThread"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
-             />
-    <counter name="GS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs."
-             data_type="uint64"
-             max_equation="100"
-             equation="A 22 READ $GsThreads UDIV"
-             underscore_name="gs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="GsEuActivePerThread"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TCS Threads Dispatched"
-             description="The total number of control shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 10 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 36 READ"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="TCS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
+    <counter name="Alpha Test Fails"
+             symbol_name="AlphaTestFails"
+             underscore_name="alpha_test_fails"
+             description="The total number of pixels dropped on post-FS alpha test."
              data_type="uint64"
-             equation="A 8 READ $HsThreads UDIV"
-             underscore_name="hs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="HsEuStallPerThread"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 37 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Late Stencil Test Fails"
+             symbol_name="PostPsStencilTestFails"
+             underscore_name="post_ps_stencil_test_fails"
+             description="The total number of pixels dropped on post-FS stencil test."
              data_type="uint64"
-             equation="A 36 READ"
-             underscore_name="samples_killed_in_ps"
              units="pixels"
-             symbol_name="SamplesKilledInPs"
              semantic_type="event"
+             equation="A 38 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Late Depth Test Fails"
+             symbol_name="PostPsDepthTestFails"
+             underscore_name="post_ps_depth_test_fails"
              description="The total number of pixels dropped on post-FS depth test."
              data_type="uint64"
-             equation="A 39 READ $SamplesKilledInPs USUB"
-             underscore_name="post_ps_depth_test_fails"
              units="pixels"
-             symbol_name="PostPsDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 39 READ $SamplesKilledInPs USUB"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Sampler Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers were bottlenecks."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TCS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="100"
-             equation="A 7 READ $HsThreads UDIV"
-             underscore_name="hs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="HsEuActivePerThread"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="FS Duration"
-             description="Total Fragment Shader GPU duration."
-             data_type="uint64"
-             equation="A 27 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 28 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="ps_duration"
-             units="us"
-             symbol_name="PsDuration"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="A 40 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 33 READ"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
+             equation="C 5 READ $EuSlicesTotalCount 4 UMUL UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="CS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
              data_type="uint64"
-             equation="A 18 READ $CsThreads UDIV"
-             underscore_name="cs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="CsEuStallPerThread"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 1 READ 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Late Stencil Test Fails"
-             description="The total number of pixels dropped on post-FS stencil test."
+    <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
+             description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
-             equation="A 38 READ"
-             underscore_name="post_ps_stencil_test_fails"
-             units="pixels"
-             symbol_name="PostPsStencilTestFails"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 0 READ 64 UMUL"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
     <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
              description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 3 READ 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 3 READ 64 UMUL"
              mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ UADD 2 UMUL $EuSlicesTotalCount UMUL 64 UMUL"
-             underscore_name="l3_sampler_throughput"
              units="bytes"
-             symbol_name="L3SamplerThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="100"
-             equation="A 2 READ $VsThreads UDIV"
-             underscore_name="vs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="VsEuActivePerThread"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 6 READ 128 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Duration"
+             symbol_name="PsDuration"
+             underscore_name="ps_duration"
+             description="Total Fragment Shader GPU duration."
+             data_type="uint64"
+             units="us"
+             semantic_type="duration"
+             equation="A 27 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 28 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Duration"
+             symbol_name="VsDuration"
+             underscore_name="vs_duration"
              description="Total Vertex Shader GPU duration."
              data_type="uint64"
+             units="us"
+             semantic_type="duration"
              equation="A 2 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 3 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="vs_duration"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Duration"
+             symbol_name="GsDuration"
+             underscore_name="gs_duration"
+             description="Total Geometry Shader GPU duration."
+             data_type="uint64"
              units="us"
-             symbol_name="VsDuration"
              semantic_type="duration"
+             equation="A 22 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 23 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL4"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TES Duration"
+             symbol_name="DsDuration"
+             underscore_name="ds_duration"
+             description="Total Evaluation Shader GPU duration."
+             data_type="uint64"
+             units="us"
+             semantic_type="duration"
+             equation="A 12 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 13 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="TCS Duration"
+             symbol_name="HsDuration"
+             underscore_name="hs_duration"
+             description="Total Control Shader GPU duration."
              data_type="uint64"
-             equation="A 40 READ"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             units="us"
+             semantic_type="duration"
+             equation="A 7 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 8 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Duration"
+             symbol_name="CsDuration"
+             underscore_name="cs_duration"
+             description="Total Compute Shader GPU duration."
+             data_type="uint64"
+             units="us"
+             semantic_type="duration"
+             equation="A 17 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 18 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers were bottlenecks."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Idle"
+             symbol_name="EuIdle"
+             underscore_name="eu_idle"
+             description="The percentage of time in which the Execution Units were idle."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="100 $EuActive $EuStall FADD FSUB"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Basic Gen7.5"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="39ad14bc-2380-45c4-91eb-fbcb3aa7ae7b"
        chipset="HSW"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="39ad14bc-2380-45c4-91eb-fbcb3aa7ae7b"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="TES EU Stall"
-             description="The percentage of time in which evaluation shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_stall"
-             units="percent"
-             symbol_name="DsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Alpha Test Fails"
-             description="The total number of pixels dropped on post-FS alpha test."
-             data_type="uint64"
-             equation="A 37 READ"
-             underscore_name="alpha_test_fails"
-             units="pixels"
-             symbol_name="AlphaTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="TCS Threads Dispatched"
-             description="The total number of control shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 10 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="TCS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             max_equation="100"
-             equation="A 7 READ $HsThreads UDIV"
-             underscore_name="hs_eu_active_per_thread"
              units="cycles"
-             symbol_name="HsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             equation="C 2 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 25 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GS EU Stall"
-             description="The percentage of time in which geometry shaders were stalled on the EUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has being processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_stall"
              units="percent"
-             symbol_name="GsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
+             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="TCS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of control shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 30 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 10 READ"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="TES Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of evaluation shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 20 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 15 READ"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="CS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="100"
-             equation="A 17 READ $CsThreads UDIV"
-             underscore_name="cs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="CsEuActivePerThread"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 25 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 23 READ $GsThreads UDIV"
-             underscore_name="gs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="GsEuStallPerThread"
+             units="threads"
              semantic_type="event"
+             equation="A 30 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Typed Atomics"
-             description="The total number of typed atomics."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ C 1 READ UADD $EuSlicesTotalCount UMUL"
-             underscore_name="typed_atomics"
-             units="messages"
-             symbol_name="TypedAtomics"
+             units="threads"
              semantic_type="event"
+             equation="A 20 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 6 READ C 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS EU Active"
+             symbol_name="VsEuActive"
+             underscore_name="vs_eu_active"
+             description="The percentage of time in which vertex shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
+    <counter name="VS AVG Active per Thread"
+             symbol_name="VsEuActivePerThread"
+             underscore_name="vs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 28 READ $PsThreads UDIV"
-             underscore_name="ps_eu_stall_per_thread"
+             max_equation="100"
              units="cycles"
-             symbol_name="PsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ $VsThreads UDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="TES EU Active"
-             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
+    <counter name="VS EU Stall"
+             symbol_name="VsEuStall"
+             underscore_name="vs_eu_stall"
+             description="The percentage of time in which vertex shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_active"
              units="percent"
-             symbol_name="DsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS AVG Stall per Thread"
+             symbol_name="VsEuStallPerThread"
+             underscore_name="vs_eu_stall_per_thread"
              description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 3 READ $VsThreads UDIV"
-             underscore_name="vs_eu_stall_per_thread"
              units="cycles"
-             symbol_name="VsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 3 READ $VsThreads UDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="TCS EU Stall"
-             description="The percentage of time in which control shaders were stalled on the EUs."
+    <counter name="FS EU Active"
+             symbol_name="PsEuActive"
+             underscore_name="ps_eu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_stall"
              units="percent"
-             symbol_name="HsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has being processing GPU commands."
+    <counter name="TCS EU Active"
+             symbol_name="HsEuActive"
+             underscore_name="hs_eu_active"
+             description="The percentage of time in which control shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
+    <counter name="TCS AVG Active per Thread"
+             symbol_name="HsEuActivePerThread"
+             underscore_name="hs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 27 READ $PsThreads UDIV"
-             underscore_name="ps_eu_active_per_thread"
              units="cycles"
-             symbol_name="PsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 7 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="TCS EU Stall"
+             symbol_name="HsEuStall"
+             underscore_name="hs_eu_stall"
+             description="The percentage of time in which control shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="TES Threads Dispatched"
-             description="The total number of evaluation shader hardware threads dispatched."
+    <counter name="TCS AVG Stall per Thread"
+             symbol_name="HsEuStallPerThread"
+             underscore_name="hs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 15 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="cycles"
              semantic_type="event"
+             equation="A 8 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TES EU Active"
+             symbol_name="DsEuActive"
+             underscore_name="ds_eu_active"
+             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="TES AVG Active per Thread"
+             symbol_name="DsEuActivePerThread"
+             underscore_name="ds_eu_active_per_thread"
              description="The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 12 READ $DsThreads UDIV"
-             underscore_name="ds_eu_active_per_thread"
              units="cycles"
-             symbol_name="DsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 12 READ $DsThreads UDIV"
              mdapi_group="EU Array/Evaluation Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS EU Active"
-             description="The percentage of time in which geometry shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_active"
-             units="percent"
-             symbol_name="GsEuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS EU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+    <counter name="TES EU Stall"
+             symbol_name="DsEuStall"
+             underscore_name="ds_eu_stall"
+             description="The percentage of time in which evaluation shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_active"
              units="percent"
-             symbol_name="PsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="TES AVG Stall per Thread"
+             symbol_name="DsEuStallPerThread"
+             underscore_name="ds_eu_stall_per_thread"
              description="The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 13 READ $DsThreads UDIV"
-             underscore_name="ds_eu_stall_per_thread"
              units="cycles"
-             symbol_name="DsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 13 READ $DsThreads UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS EU Active"
+             symbol_name="GsEuActive"
+             underscore_name="gs_eu_active"
+             description="The percentage of time in which geometry shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
              />
     <counter name="GS AVG Active per Thread"
+             symbol_name="GsEuActivePerThread"
+             underscore_name="gs_eu_active_per_thread"
              description="The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 22 READ $GsThreads UDIV"
-             underscore_name="gs_eu_active_per_thread"
              units="cycles"
-             symbol_name="GsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 22 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS EU Stall"
+             symbol_name="GsEuStall"
+             underscore_name="gs_eu_stall"
+             description="The percentage of time in which geometry shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="GS AVG Stall per Thread"
+             symbol_name="GsEuStallPerThread"
+             underscore_name="gs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 2 READ B 3 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
-             units="bytes"
-             symbol_name="UntypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Draw"
-             mdapi_group="L3/Data Port"
+             units="cycles"
+             semantic_type="event"
+             equation="A 23 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of byten written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="CS EU Active"
+             symbol_name="CsEuActive"
+             underscore_name="cs_eu_active"
+             description="The percentage of time in which compute shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TCS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
+    <counter name="CS AVG Active per Thread"
+             symbol_name="CsEuActivePerThread"
+             underscore_name="cs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 8 READ $HsThreads UDIV"
-             underscore_name="hs_eu_stall_per_thread"
+             max_equation="100"
              units="cycles"
-             symbol_name="HsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 17 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="TCS EU Active"
-             description="The percentage of time in which control shaders were processed actively on the EUs."
+    <counter name="CS EU Stall"
+             symbol_name="CsEuStall"
+             underscore_name="cs_eu_stall"
+             description="The percentage of time in which compute shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_active"
              units="percent"
-             symbol_name="HsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="CS AVG Stall per Thread"
+             symbol_name="CsEuStallPerThread"
+             underscore_name="cs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 36 READ"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 18 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Late Depth Test Fails"
-             description="The total number of pixels dropped on post-FS depth test."
+    <counter name="FS AVG Active per Thread"
+             symbol_name="PsEuActivePerThread"
+             underscore_name="ps_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 39 READ $SamplesKilledInPs USUB"
-             underscore_name="post_ps_depth_test_fails"
-             units="pixels"
-             symbol_name="PostPsDepthTestFails"
+             max_equation="100"
+             units="cycles"
              semantic_type="event"
+             equation="A 27 READ $PsThreads UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
     <counter name="FS EU Stall"
+             symbol_name="PsEuStall"
+             underscore_name="ps_eu_stall"
              description="The percentage of time in which fragment shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_stall"
              units="percent"
-             symbol_name="PsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="FS AVG Stall per Thread"
+             symbol_name="PsEuStallPerThread"
+             underscore_name="ps_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="A 28 READ $PsThreads UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 33 READ"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 33 READ"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS EU Active"
-             description="The percentage of time in which vertex shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_active"
-             units="percent"
-             symbol_name="VsEuActive"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS EU Active"
-             description="The percentage of time in which compute shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_active"
-             units="percent"
-             symbol_name="CsEuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 36 READ"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
+    <counter name="Alpha Test Fails"
+             symbol_name="AlphaTestFails"
+             underscore_name="alpha_test_fails"
+             description="The total number of pixels dropped on post-FS alpha test."
              data_type="uint64"
-             equation="A 18 READ $CsThreads UDIV"
-             underscore_name="cs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="CsEuStallPerThread"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 37 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="Late Stencil Test Fails"
+             symbol_name="PostPsStencilTestFails"
+             underscore_name="post_ps_stencil_test_fails"
              description="The total number of pixels dropped on post-FS stencil test."
              data_type="uint64"
-             equation="A 38 READ"
-             underscore_name="post_ps_stencil_test_fails"
              units="pixels"
-             symbol_name="PostPsStencilTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 38 READ"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Late Depth Test Fails"
+             symbol_name="PostPsDepthTestFails"
+             underscore_name="post_ps_depth_test_fails"
+             description="The total number of pixels dropped on post-FS depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 39 READ $SamplesKilledInPs USUB"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="100"
-             equation="A 2 READ $VsThreads UDIV"
-             underscore_name="vs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="VsEuActivePerThread"
+             units="pixels"
              semantic_type="event"
+             equation="A 40 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS EU Stall"
-             description="The percentage of time in which compute shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_stall"
-             units="percent"
-             symbol_name="CsEuStall"
-             semantic_type="duration"
+    <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 4 READ B 5 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Typed Atomics"
+             symbol_name="TypedAtomics"
+             underscore_name="typed_atomics"
+             description="The total number of typed atomics."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 0 READ C 1 READ UADD $EuSlicesTotalCount UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 0 READ B 1 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 2 READ B 3 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 4 READ B 5 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
+             equation="C 6 READ C 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of byten written into shared local memory."
              data_type="uint64"
-             equation="A 40 READ"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="VS EU Stall"
-             description="The percentage of time in which vertex shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_stall"
-             units="percent"
-             symbol_name="VsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extended Gen7.5"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="3865be28-6982-49fe-9494-e4d1b4795413"
        chipset="HSW"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="3865be28-6982-49fe-9494-e4d1b4795413"
        >
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
-             units="messages"
-             symbol_name="EuUntypedWrites0"
-             semantic_type="event"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuClocks"
+             underscore_name="gpu_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
-             units="messages"
-             symbol_name="UntypedWrites0"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             equation="B 7 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
-             units="messages"
-             symbol_name="TypedReads0"
+             units="threads"
              semantic_type="event"
+             equation="A 20 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
+    <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
+             description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
              units="messages"
-             symbol_name="TypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
              units="messages"
-             symbol_name="EuTypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="B 1 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedReads0"
-             description="The subslice 0 EU Untyped Reads subslice 0."
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
              description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gpu_clocks"
-             units="cycles"
-             symbol_name="GpuClocks"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="EuUrbAtomics0"
+             symbol_name="EuUrbAtomics0"
+             underscore_name="eu_urb_atomics0"
+             description="The subslice 0 EU URB Atomics subslice 0."
              data_type="uint64"
-             equation="A 20 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
              units="messages"
-             symbol_name="EuTypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 3 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUrbAtomics0"
-             description="The subslice 0 EU URB Atomics subslice 0."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_urb_atomics0"
              units="messages"
-             symbol_name="EuUrbAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="The ratio of EU typed read requests to L3 cache line reads."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
              description="The ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="$EuUntypedReads0 $UntypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuUntypedWrites0 $UntypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="The ratio of EU typed read requests to L3 cache line reads."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Reads Distribution Gen7.5"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="bb5ed49b-2497-4095-94f6-26ba294db88a"
        chipset="HSW"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="bb5ed49b-2497-4095-94f6-26ba294db88a"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="TES EU Stall"
-             description="The percentage of time in which evaluation shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_stall"
-             units="percent"
-             symbol_name="DsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Alpha Test Fails"
-             description="The total number of pixels dropped on post-FS alpha test."
-             data_type="uint64"
-             equation="A 37 READ"
-             underscore_name="alpha_test_fails"
-             units="pixels"
-             symbol_name="AlphaTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="TES Threads Dispatched"
-             description="The total number of evaluation shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 15 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
              />
-    <counter name="TES AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             max_equation="100"
-             equation="A 12 READ $DsThreads UDIV"
-             underscore_name="ds_eu_active_per_thread"
              units="cycles"
-             symbol_name="DsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
+             equation="C 7 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 25 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GS EU Stall"
-             description="The percentage of time in which geometry shaders were stalled on the EUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has being processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_stall"
              units="percent"
-             symbol_name="GsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
+             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 5 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="LLC GPU Read Accesses"
-             description="The total number of LLC cache lookups for reads done from the GPU."
-             data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="llc_read_accesses"
-             units="messages"
-             symbol_name="LlcReadAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="LLC"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="TCS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of control shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 30 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads (64B each)."
-             data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_memory_reads"
-             units="messages"
-             symbol_name="GtiMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="TES Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of evaluation shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 20 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 15 READ"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="CS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="100"
-             equation="A 17 READ $CsThreads UDIV"
-             underscore_name="cs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="CsEuActivePerThread"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 25 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
-             units="messages"
-             symbol_name="GtiRczMemoryReads"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             equation="A 30 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 23 READ $GsThreads UDIV"
-             underscore_name="gs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="GsEuStallPerThread"
+             units="threads"
              semantic_type="event"
+             equation="A 20 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+    <counter name="VS EU Active"
+             symbol_name="VsEuActive"
+             underscore_name="vs_eu_active"
+             description="The percentage of time in which vertex shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="FS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
+    <counter name="VS AVG Active per Thread"
+             symbol_name="VsEuActivePerThread"
+             underscore_name="vs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 28 READ $PsThreads UDIV"
-             underscore_name="ps_eu_stall_per_thread"
+             max_equation="100"
              units="cycles"
-             symbol_name="PsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ $VsThreads UDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="TES EU Active"
-             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
+    <counter name="VS EU Stall"
+             symbol_name="VsEuStall"
+             underscore_name="vs_eu_stall"
+             description="The percentage of time in which vertex shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_active"
              units="percent"
-             symbol_name="DsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS AVG Stall per Thread"
+             symbol_name="VsEuStallPerThread"
+             underscore_name="vs_eu_stall_per_thread"
              description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 3 READ $VsThreads UDIV"
-             underscore_name="vs_eu_stall_per_thread"
              units="cycles"
-             symbol_name="VsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ $VsThreads UDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="TCS EU Stall"
-             description="The percentage of time in which control shaders were stalled on the EUs."
+    <counter name="TCS EU Active"
+             symbol_name="HsEuActive"
+             underscore_name="hs_eu_active"
+             description="The percentage of time in which control shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_stall"
              units="percent"
-             symbol_name="HsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has being processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="FS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
+    <counter name="TCS AVG Active per Thread"
+             symbol_name="HsEuActivePerThread"
+             underscore_name="hs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 27 READ $PsThreads UDIV"
-             underscore_name="ps_eu_active_per_thread"
              units="cycles"
-             symbol_name="PsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 7 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiCmdStreamerMemoryReads"
-             description="The total number of GTI memory reads from Command Streamer."
-             data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+    <counter name="TCS EU Stall"
+             symbol_name="HsEuStall"
+             underscore_name="hs_eu_stall"
+             description="The percentage of time in which control shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="TCS AVG Stall per Thread"
+             symbol_name="HsEuStallPerThread"
+             underscore_name="hs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 8 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GS EU Active"
-             description="The percentage of time in which geometry shaders were processed actively on the EUs."
+    <counter name="TES EU Active"
+             symbol_name="DsEuActive"
+             underscore_name="ds_eu_active"
+             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_active"
              units="percent"
-             symbol_name="GsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS EU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+    <counter name="TES AVG Active per Thread"
+             symbol_name="DsEuActivePerThread"
+             underscore_name="ds_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs."
+             data_type="uint64"
+             max_equation="100"
+             units="cycles"
+             semantic_type="event"
+             equation="A 12 READ $DsThreads UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TES EU Stall"
+             symbol_name="DsEuStall"
+             underscore_name="ds_eu_stall"
+             description="The percentage of time in which evaluation shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_active"
              units="percent"
-             symbol_name="PsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="TES AVG Stall per Thread"
+             symbol_name="DsEuStallPerThread"
+             underscore_name="ds_eu_stall_per_thread"
              description="The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 13 READ $DsThreads UDIV"
-             underscore_name="ds_eu_stall_per_thread"
              units="cycles"
-             symbol_name="DsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 13 READ $DsThreads UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS EU Active"
+             symbol_name="GsEuActive"
+             underscore_name="gs_eu_active"
+             description="The percentage of time in which geometry shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
              />
     <counter name="GS AVG Active per Thread"
+             symbol_name="GsEuActivePerThread"
+             underscore_name="gs_eu_active_per_thread"
              description="The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 22 READ $GsThreads UDIV"
-             underscore_name="gs_eu_active_per_thread"
              units="cycles"
-             symbol_name="GsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS EU Stall"
+             symbol_name="GsEuStall"
+             underscore_name="gs_eu_stall"
+             description="The percentage of time in which geometry shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiHiDepthMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache."
+    <counter name="GS AVG Stall per Thread"
+             symbol_name="GsEuStallPerThread"
+             underscore_name="gs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hi_depth_memory_reads"
-             units="messages"
-             symbol_name="GtiHiDepthMemoryReads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             equation="A 23 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="TCS Threads Dispatched"
-             description="The total number of control shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 10 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="CS EU Active"
+             symbol_name="CsEuActive"
+             underscore_name="cs_eu_active"
+             description="The percentage of time in which compute shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="TCS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
+    <counter name="CS AVG Active per Thread"
+             symbol_name="CsEuActivePerThread"
+             underscore_name="cs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 8 READ $HsThreads UDIV"
-             underscore_name="hs_eu_stall_per_thread"
+             max_equation="100"
              units="cycles"
-             symbol_name="HsEuStallPerThread"
              semantic_type="event"
+             equation="A 17 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="TCS EU Active"
-             description="The percentage of time in which control shaders were processed actively on the EUs."
+    <counter name="CS EU Stall"
+             symbol_name="CsEuStall"
+             underscore_name="cs_eu_stall"
+             description="The percentage of time in which compute shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_active"
              units="percent"
-             symbol_name="HsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="CS AVG Stall per Thread"
+             symbol_name="CsEuStallPerThread"
+             underscore_name="cs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 36 READ"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 18 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Late Depth Test Fails"
-             description="The total number of pixels dropped on post-FS depth test."
-             data_type="uint64"
-             equation="A 39 READ $SamplesKilledInPs USUB"
-             underscore_name="post_ps_depth_test_fails"
-             units="pixels"
-             symbol_name="PostPsDepthTestFails"
-             semantic_type="event"
+    <counter name="FS EU Active"
+             symbol_name="PsEuActive"
+             underscore_name="ps_eu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="TCS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
+    <counter name="FS AVG Active per Thread"
+             symbol_name="PsEuActivePerThread"
+             underscore_name="ps_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 7 READ $HsThreads UDIV"
-             underscore_name="hs_eu_active_per_thread"
              units="cycles"
-             symbol_name="HsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 27 READ $PsThreads UDIV"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="FS EU Stall"
+             symbol_name="PsEuStall"
+             underscore_name="ps_eu_stall"
              description="The percentage of time in which fragment shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_stall"
              units="percent"
-             symbol_name="PsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="FS AVG Stall per Thread"
+             symbol_name="PsEuStallPerThread"
+             underscore_name="ps_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
-             units="messages"
-             symbol_name="GtiMscMemoryReads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             equation="A 28 READ $PsThreads UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 33 READ"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
+             equation="A 33 READ"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS EU Active"
-             description="The percentage of time in which vertex shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_active"
-             units="percent"
-             symbol_name="VsEuActive"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 36 READ"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS EU Active"
-             description="The percentage of time in which compute shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_active"
-             units="percent"
-             symbol_name="CsEuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+    <counter name="Alpha Test Fails"
+             symbol_name="AlphaTestFails"
+             underscore_name="alpha_test_fails"
+             description="The total number of pixels dropped on post-FS alpha test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 37 READ"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
+    <counter name="Late Stencil Test Fails"
+             symbol_name="PostPsStencilTestFails"
+             underscore_name="post_ps_stencil_test_fails"
+             description="The total number of pixels dropped on post-FS stencil test."
              data_type="uint64"
-             equation="A 18 READ $CsThreads UDIV"
-             underscore_name="cs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="CsEuStallPerThread"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 38 READ"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="Late Depth Test Fails"
+             symbol_name="PostPsDepthTestFails"
+             underscore_name="post_ps_depth_test_fails"
+             description="The total number of pixels dropped on post-FS depth test."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
-             units="messages"
-             symbol_name="GtiVfMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             equation="A 39 READ $SamplesKilledInPs USUB"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
              />
-    <counter name="Late Stencil Test Fails"
-             description="The total number of pixels dropped on post-FS stencil test."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 38 READ"
-             underscore_name="post_ps_stencil_test_fails"
              units="pixels"
-             symbol_name="PostPsStencilTestFails"
              semantic_type="event"
+             equation="A 40 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache."
+    <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
+             description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
+             mdapi_usage_flags="Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="VS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             max_equation="100"
-             equation="A 2 READ $VsThreads UDIV"
-             underscore_name="vs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="VsEuActivePerThread"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
              units="messages"
-             symbol_name="GtiRsMemoryReads"
              semantic_type="event"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiHiDepthMemoryReads"
+             symbol_name="GtiHiDepthMemoryReads"
+             underscore_name="gti_hi_depth_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="CS EU Stall"
-             description="The percentage of time in which compute shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_stall"
-             units="percent"
-             symbol_name="CsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache."
              data_type="uint64"
-             equation="A 40 READ"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 misses)."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="GTI"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads (64B each)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ"
              mdapi_group="GTI"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS EU Stall"
-             description="The percentage of time in which vertex shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_stall"
-             units="percent"
-             symbol_name="VsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="LLC GPU Read Accesses"
+             symbol_name="LlcReadAccesses"
+             underscore_name="llc_read_accesses"
+             description="The total number of LLC cache lookups for reads done from the GPU."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="LLC"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Memory Writes Distribution Gen7.5"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="3358d639-9b5f-45ab-976d-9b08cbfc6240"
        chipset="HSW"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="3358d639-9b5f-45ab-976d-9b08cbfc6240"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="TES EU Stall"
-             description="The percentage of time in which evaluation shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_stall"
-             units="percent"
-             symbol_name="DsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Alpha Test Fails"
-             description="The total number of pixels dropped on post-FS alpha test."
-             data_type="uint64"
-             equation="A 37 READ"
-             underscore_name="alpha_test_fails"
-             units="pixels"
-             symbol_name="AlphaTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="TES Threads Dispatched"
-             description="The total number of evaluation shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 15 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
              />
-    <counter name="TES AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             max_equation="100"
-             equation="A 12 READ $DsThreads UDIV"
-             underscore_name="ds_eu_active_per_thread"
              units="cycles"
-             symbol_name="DsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
+             equation="C 7 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 25 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GS EU Stall"
-             description="The percentage of time in which geometry shaders were stalled on the EUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has being processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_stall"
              units="percent"
-             symbol_name="GsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
+             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 5 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 30 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
-             data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 20 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="CS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
+    <counter name="TCS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of control shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="100"
-             equation="A 17 READ $CsThreads UDIV"
-             underscore_name="cs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="CsEuActivePerThread"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 10 READ"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="TES Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of evaluation shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             equation="A 15 READ"
+             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 23 READ $GsThreads UDIV"
-             underscore_name="gs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="GsEuStallPerThread"
+             units="threads"
              semantic_type="event"
+             equation="A 25 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             equation="A 30 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
-             units="messages"
-             symbol_name="GtiHizMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             equation="A 20 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
-             data_type="uint64"
-             equation="A 28 READ $PsThreads UDIV"
-             underscore_name="ps_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="PsEuStallPerThread"
-             semantic_type="event"
+    <counter name="VS EU Active"
+             symbol_name="VsEuActive"
+             underscore_name="vs_eu_active"
+             description="The percentage of time in which vertex shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="VS AVG Active per Thread"
+             symbol_name="VsEuActivePerThread"
+             underscore_name="vs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
-             units="messages"
-             symbol_name="GtiRccMemoryWrites"
+             max_equation="100"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             equation="A 2 READ $VsThreads UDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="TES EU Active"
-             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
+    <counter name="VS EU Stall"
+             symbol_name="VsEuStall"
+             underscore_name="vs_eu_stall"
+             description="The percentage of time in which vertex shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_active"
              units="percent"
-             symbol_name="DsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS AVG Stall per Thread"
+             symbol_name="VsEuStallPerThread"
+             underscore_name="vs_eu_stall_per_thread"
              description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 3 READ $VsThreads UDIV"
-             underscore_name="vs_eu_stall_per_thread"
              units="cycles"
-             symbol_name="VsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ $VsThreads UDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="TCS EU Stall"
-             description="The percentage of time in which control shaders were stalled on the EUs."
+    <counter name="TCS EU Active"
+             symbol_name="HsEuActive"
+             underscore_name="hs_eu_active"
+             description="The percentage of time in which control shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_stall"
              units="percent"
-             symbol_name="HsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has being processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="FS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
+    <counter name="TCS AVG Active per Thread"
+             symbol_name="HsEuActivePerThread"
+             underscore_name="hs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 27 READ $PsThreads UDIV"
-             underscore_name="ps_eu_active_per_thread"
              units="cycles"
-             symbol_name="PsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 7 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="TCS EU Stall"
+             symbol_name="HsEuStall"
+             underscore_name="hs_eu_stall"
+             description="The percentage of time in which control shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="LLC GPU Write Accesses"
-             description="The total number of LLC cache lookups for write done from the GPU (32B writes)."
-             data_type="uint64"
-             equation="C 6 READ 2 UMUL"
-             underscore_name="llc_wr_accesses"
-             units="messages"
-             symbol_name="LlcWrAccesses"
+    <counter name="TCS AVG Stall per Thread"
+             symbol_name="HsEuStallPerThread"
+             underscore_name="hs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
+             data_type="uint64"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             equation="A 8 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="LLC"
              />
-    <counter name="GS EU Active"
-             description="The percentage of time in which geometry shaders were processed actively on the EUs."
+    <counter name="TES EU Active"
+             symbol_name="DsEuActive"
+             underscore_name="ds_eu_active"
+             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_active"
              units="percent"
-             symbol_name="GsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS EU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+    <counter name="TES AVG Active per Thread"
+             symbol_name="DsEuActivePerThread"
+             underscore_name="ds_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs."
+             data_type="uint64"
+             max_equation="100"
+             units="cycles"
+             semantic_type="event"
+             equation="A 12 READ $DsThreads UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TES EU Stall"
+             symbol_name="DsEuStall"
+             underscore_name="ds_eu_stall"
+             description="The percentage of time in which evaluation shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_active"
              units="percent"
-             symbol_name="PsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
-             data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
-             units="messages"
-             symbol_name="GtiStcMemoryWrites"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
     <counter name="TES AVG Stall per Thread"
+             symbol_name="DsEuStallPerThread"
+             underscore_name="ds_eu_stall_per_thread"
              description="The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 13 READ $DsThreads UDIV"
-             underscore_name="ds_eu_stall_per_thread"
              units="cycles"
-             symbol_name="DsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 13 READ $DsThreads UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS EU Active"
+             symbol_name="GsEuActive"
+             underscore_name="gs_eu_active"
+             description="The percentage of time in which geometry shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
              />
     <counter name="GS AVG Active per Thread"
+             symbol_name="GsEuActivePerThread"
+             underscore_name="gs_eu_active_per_thread"
              description="The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 22 READ $GsThreads UDIV"
-             underscore_name="gs_eu_active_per_thread"
              units="cycles"
-             symbol_name="GsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS EU Stall"
+             symbol_name="GsEuStall"
+             underscore_name="gs_eu_stall"
+             description="The percentage of time in which geometry shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TCS Threads Dispatched"
-             description="The total number of control shader hardware threads dispatched."
+    <counter name="GS AVG Stall per Thread"
+             symbol_name="GsEuStallPerThread"
+             underscore_name="gs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 10 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="cycles"
              semantic_type="event"
+             equation="A 23 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="TCS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
+    <counter name="CS EU Active"
+             symbol_name="CsEuActive"
+             underscore_name="cs_eu_active"
+             description="The percentage of time in which compute shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS AVG Active per Thread"
+             symbol_name="CsEuActivePerThread"
+             underscore_name="cs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 8 READ $HsThreads UDIV"
-             underscore_name="hs_eu_stall_per_thread"
+             max_equation="100"
              units="cycles"
-             symbol_name="HsEuStallPerThread"
              semantic_type="event"
+             equation="A 17 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="TCS EU Active"
-             description="The percentage of time in which control shaders were processed actively on the EUs."
+    <counter name="CS EU Stall"
+             symbol_name="CsEuStall"
+             underscore_name="cs_eu_stall"
+             description="The percentage of time in which compute shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_active"
              units="percent"
-             symbol_name="HsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="CS AVG Stall per Thread"
+             symbol_name="CsEuStallPerThread"
+             underscore_name="cs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 36 READ"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 18 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Late Depth Test Fails"
-             description="The total number of pixels dropped on post-FS depth test."
-             data_type="uint64"
-             equation="A 39 READ $SamplesKilledInPs USUB"
-             underscore_name="post_ps_depth_test_fails"
-             units="pixels"
-             symbol_name="PostPsDepthTestFails"
-             semantic_type="event"
+    <counter name="FS EU Active"
+             symbol_name="PsEuActive"
+             underscore_name="ps_eu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="TCS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
+    <counter name="FS AVG Active per Thread"
+             symbol_name="PsEuActivePerThread"
+             underscore_name="ps_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 7 READ $HsThreads UDIV"
-             underscore_name="hs_eu_active_per_thread"
              units="cycles"
-             symbol_name="HsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 27 READ $PsThreads UDIV"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="FS EU Stall"
+             symbol_name="PsEuStall"
+             underscore_name="ps_eu_stall"
              description="The percentage of time in which fragment shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_stall"
              units="percent"
-             symbol_name="PsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="FS AVG Stall per Thread"
+             symbol_name="PsEuStallPerThread"
+             underscore_name="ps_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="A 28 READ $PsThreads UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 33 READ"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 33 READ"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS EU Active"
-             description="The percentage of time in which vertex shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_active"
-             units="percent"
-             symbol_name="VsEuActive"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS EU Active"
-             description="The percentage of time in which compute shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_active"
-             units="percent"
-             symbol_name="CsEuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 36 READ"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
+    <counter name="Alpha Test Fails"
+             symbol_name="AlphaTestFails"
+             underscore_name="alpha_test_fails"
+             description="The total number of pixels dropped on post-FS alpha test."
              data_type="uint64"
-             equation="A 18 READ $CsThreads UDIV"
-             underscore_name="cs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="CsEuStallPerThread"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 37 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="Late Stencil Test Fails"
+             symbol_name="PostPsStencilTestFails"
+             underscore_name="post_ps_stencil_test_fails"
              description="The total number of pixels dropped on post-FS stencil test."
              data_type="uint64"
-             equation="A 38 READ"
-             underscore_name="post_ps_stencil_test_fails"
              units="pixels"
-             symbol_name="PostPsStencilTestFails"
              semantic_type="event"
+             equation="A 38 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Late Depth Test Fails"
+             symbol_name="PostPsDepthTestFails"
+             underscore_name="post_ps_depth_test_fails"
+             description="The total number of pixels dropped on post-FS depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 39 READ $SamplesKilledInPs USUB"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 40 READ"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
              description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
              />
-    <counter name="VS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             max_equation="100"
-             equation="A 2 READ $VsThreads UDIV"
-             underscore_name="vs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="VsEuActivePerThread"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
              mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS EU Stall"
-             description="The percentage of time in which compute shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_stall"
-             units="percent"
-             symbol_name="CsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="A 40 READ"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
     <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
              description="The total number of GTI memory writes (64B each)."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_memory_writes"
              units="messages"
-             symbol_name="GtiMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 5 READ"
+             mdapi_group="GTI"
              mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="VS EU Stall"
-             description="The percentage of time in which vertex shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_stall"
-             units="percent"
-             symbol_name="VsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="LLC GPU Write Accesses"
+             symbol_name="LlcWrAccesses"
+             underscore_name="llc_wr_accesses"
+             description="The total number of LLC cache lookups for write done from the GPU (32B writes)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ 2 UMUL"
+             mdapi_group="LLC"
+             mdapi_usage_flags="Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set SamplerBalance"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler_balance"
-       hw_config_guid="bc274488-b4b6-40c7-90da-b77d7ad16189"
        chipset="HSW"
        symbol_name="SamplerBalance"
+       underscore_name="sampler_balance"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="bc274488-b4b6-40c7-90da-b77d7ad16189"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TES EU Stall"
-             description="The percentage of time in which evaluation shaders were stalled on the EUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has being processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_stall"
              units="percent"
-             symbol_name="DsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler L2 cache misses (ss3)"
-             description="Number of sampler L2 cache misses (ss3)"
-             data_type="uint64"
-             equation="C 1 READ C 0 READ UADD"
-             underscore_name="sampler3_l2_cache_misses"
-             units="messages"
-             symbol_name="Sampler3L2CacheMisses"
-             availability="$SubsliceMask 0x8 AND"
-             semantic_type="event"
+             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="Sampler/Sampler Cache"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Alpha Test Fails"
-             description="The total number of pixels dropped on post-FS alpha test."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 37 READ"
-             underscore_name="alpha_test_fails"
-             units="pixels"
-             symbol_name="AlphaTestFails"
+             units="threads"
              semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="TES Threads Dispatched"
-             description="The total number of evaluation shader hardware threads dispatched."
+    <counter name="TCS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of control shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 15 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 10 READ"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
              />
-    <counter name="TES AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs."
+    <counter name="TES Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of evaluation shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="100"
-             equation="A 12 READ $DsThreads UDIV"
-             underscore_name="ds_eu_active_per_thread"
-             units="cycles"
-             symbol_name="DsEuActivePerThread"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 15 READ"
              mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="GS EU Stall"
-             description="The percentage of time in which geometry shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_stall"
-             units="percent"
-             symbol_name="GsEuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 25 READ"
              mdapi_group="EU Array/Geometry Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="CS EU Active"
-             description="The percentage of time in which compute shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_active"
-             units="percent"
-             symbol_name="CsEuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS EU Active"
-             description="The percentage of time in which vertex shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_active"
-             units="percent"
-             symbol_name="VsEuActive"
-             semantic_type="duration"
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 30 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="TCS EU Active"
-             description="The percentage of time in which control shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_active"
-             units="percent"
-             symbol_name="HsEuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="TES EU Active"
-             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_eu_active"
-             units="percent"
-             symbol_name="DsEuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Evaluation Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS EU Active"
-             description="The percentage of time in which geometry shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_eu_active"
-             units="percent"
-             symbol_name="GsEuActive"
-             semantic_type="duration"
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 20 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="FS EU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_active"
-             units="percent"
-             symbol_name="PsEuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS EU Stall"
-             description="The percentage of time in which compute shaders were stalled on the EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cs_eu_stall"
              units="percent"
-             symbol_name="CsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
+             equation="A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS EU Stall"
-             description="The percentage of time in which vertex shaders were stalled on the EUs."
+    <counter name="VS EU Active"
+             symbol_name="VsEuActive"
+             underscore_name="vs_eu_active"
+             description="The percentage of time in which vertex shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_stall"
              units="percent"
-             symbol_name="VsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS AVG Active per Thread"
+             symbol_name="VsEuActivePerThread"
+             underscore_name="vs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
+             data_type="uint64"
+             max_equation="100"
+             units="cycles"
+             semantic_type="event"
+             equation="A 2 READ $VsThreads UDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TCS EU Stall"
-             description="The percentage of time in which control shaders were stalled on the EUs."
+    <counter name="VS EU Stall"
+             symbol_name="VsEuStall"
+             underscore_name="vs_eu_stall"
+             description="The percentage of time in which vertex shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_eu_stall"
              units="percent"
-             symbol_name="HsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS EU Stall"
-             description="The percentage of time in which fragment shaders were stalled on the EUs."
+    <counter name="VS AVG Stall per Thread"
+             symbol_name="VsEuStallPerThread"
+             underscore_name="vs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="A 3 READ $VsThreads UDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TCS EU Active"
+             symbol_name="HsEuActive"
+             underscore_name="hs_eu_active"
+             description="The percentage of time in which control shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_stall"
              units="percent"
-             symbol_name="PsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Control Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="TCS AVG Active per Thread"
+             symbol_name="HsEuActivePerThread"
+             underscore_name="hs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="100"
+             units="cycles"
+             semantic_type="event"
+             equation="A 7 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="CS Duration"
-             description="Total Compute Shader GPU duration."
-             data_type="uint64"
-             equation="A 17 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 18 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="cs_duration"
-             units="us"
-             symbol_name="CsDuration"
+    <counter name="TCS EU Stall"
+             symbol_name="HsEuStall"
+             underscore_name="hs_eu_stall"
+             description="The percentage of time in which control shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="TCS AVG Stall per Thread"
+             symbol_name="HsEuStallPerThread"
+             underscore_name="hs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 30 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 8 READ $HsThreads UDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="TES Duration"
-             description="Total Evaluation Shader GPU duration."
-             data_type="uint64"
-             equation="A 12 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 13 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="ds_duration"
-             units="us"
-             symbol_name="DsDuration"
+    <counter name="TES EU Active"
+             symbol_name="DsEuActive"
+             underscore_name="ds_eu_active"
+             description="The percentage of time in which evaluation shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Evaluation Shader"
-             />
-    <counter name="GS Duration"
-             description="Total Geometry Shader GPU duration."
-             data_type="uint64"
-             equation="A 22 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 23 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="gs_duration"
-             units="us"
-             symbol_name="GsDuration"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="TES AVG Active per Thread"
+             symbol_name="DsEuActivePerThread"
+             underscore_name="ds_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 20 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             max_equation="100"
+             units="cycles"
              semantic_type="event"
+             equation="A 12 READ $DsThreads UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="CS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
-             data_type="uint64"
+    <counter name="TES EU Stall"
+             symbol_name="DsEuStall"
+             underscore_name="ds_eu_stall"
+             description="The percentage of time in which evaluation shaders were stalled on the EUs."
+             data_type="float"
              max_equation="100"
-             equation="A 17 READ $CsThreads UDIV"
-             underscore_name="cs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="CsEuActivePerThread"
-             semantic_type="event"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="EU Array/Compute Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler L2 cache misses"
-             description="Number of sampler L2 cache misses"
+    <counter name="TES AVG Stall per Thread"
+             symbol_name="DsEuStallPerThread"
+             underscore_name="ds_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs."
              data_type="uint64"
-             equation="C 7 READ C 6 READ UADD C 5 READ UADD C 4 READ UADD C 3 READ UADD C 2 READ UADD C 1 READ UADD C 0 READ UADD"
-             underscore_name="sampler_l2_cache_misses"
-             units="messages"
-             symbol_name="SamplerL2CacheMisses"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Overview Frame Batch Draw"
+             equation="A 13 READ $DsThreads UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS EU Active"
+             symbol_name="GsEuActive"
+             underscore_name="gs_eu_active"
+             description="The percentage of time in which geometry shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="GS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
+    <counter name="GS AVG Active per Thread"
+             symbol_name="GsEuActivePerThread"
+             underscore_name="gs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 23 READ $GsThreads UDIV"
-             underscore_name="gs_eu_stall_per_thread"
+             max_equation="100"
              units="cycles"
-             symbol_name="GsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Sampler L2 cache misses (ss2)"
-             description="Number of sampler L2 cache misses (ss2)"
-             data_type="uint64"
-             equation="C 3 READ C 2 READ UADD"
-             underscore_name="sampler2_l2_cache_misses"
-             units="messages"
-             symbol_name="Sampler2L2CacheMisses"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="Sampler/Sampler Cache"
+    <counter name="GS EU Stall"
+             symbol_name="GsEuStall"
+             underscore_name="gs_eu_stall"
+             description="The percentage of time in which geometry shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS AVG Stall per Thread"
+             symbol_name="GsEuStallPerThread"
+             underscore_name="gs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 23 READ $GsThreads UDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Idle"
-             description="The percentage of time in which the Execution Units were idle."
+    <counter name="CS EU Active"
+             symbol_name="CsEuActive"
+             underscore_name="cs_eu_active"
+             description="The percentage of time in which compute shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="100 $EuActive $EuStall FADD FSUB"
-             underscore_name="eu_idle"
              units="percent"
-             symbol_name="EuIdle"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
+    <counter name="CS AVG Active per Thread"
+             symbol_name="CsEuActivePerThread"
+             underscore_name="cs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 28 READ $PsThreads UDIV"
-             underscore_name="ps_eu_stall_per_thread"
+             max_equation="100"
              units="cycles"
-             symbol_name="PsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 17 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="VS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
+    <counter name="CS EU Stall"
+             symbol_name="CsEuStall"
+             underscore_name="cs_eu_stall"
+             description="The percentage of time in which compute shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS AVG Stall per Thread"
+             symbol_name="CsEuStallPerThread"
+             underscore_name="cs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 3 READ $VsThreads UDIV"
-             underscore_name="vs_eu_stall_per_thread"
              units="cycles"
-             symbol_name="VsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $CsThreads UDIV"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has being processing GPU commands."
+    <counter name="FS EU Active"
+             symbol_name="PsEuActive"
+             underscore_name="ps_eu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 41 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="FS AVG Active per Thread"
+             symbol_name="PsEuActivePerThread"
+             underscore_name="ps_eu_active_per_thread"
              description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
              data_type="uint64"
              max_equation="100"
-             equation="A 27 READ $PsThreads UDIV"
-             underscore_name="ps_eu_active_per_thread"
              units="cycles"
-             symbol_name="PsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 27 READ $PsThreads UDIV"
              mdapi_group="EU Array/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="TCS Duration"
-             description="Total Control Shader GPU duration."
-             data_type="uint64"
-             equation="A 7 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 8 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="hs_duration"
-             units="us"
-             symbol_name="HsDuration"
+    <counter name="FS EU Stall"
+             symbol_name="PsEuStall"
+             underscore_name="ps_eu_stall"
+             description="The percentage of time in which fragment shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 OCL"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="TES AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs."
+    <counter name="FS AVG Stall per Thread"
+             symbol_name="PsEuStallPerThread"
+             underscore_name="ps_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 13 READ $DsThreads UDIV"
-             underscore_name="ds_eu_stall_per_thread"
              units="cycles"
-             symbol_name="DsEuStallPerThread"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 28 READ $PsThreads UDIV"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Evaluation Shader"
              />
-    <counter name="GS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             max_equation="100"
-             equation="A 22 READ $GsThreads UDIV"
-             underscore_name="gs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="GsEuActivePerThread"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Geometry Shader"
+             equation="A 33 READ"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TCS Threads Dispatched"
-             description="The total number of control shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 10 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="TCS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were stalled on the EUs."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 8 READ $HsThreads UDIV"
-             underscore_name="hs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="HsEuStallPerThread"
+             units="pixels"
+             semantic_type="event"
+             equation="A 36 READ"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Alpha Test Fails"
+             symbol_name="AlphaTestFails"
+             underscore_name="alpha_test_fails"
+             description="The total number of pixels dropped on post-FS alpha test."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 37 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Control Shader"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Late Stencil Test Fails"
+             symbol_name="PostPsStencilTestFails"
+             underscore_name="post_ps_stencil_test_fails"
+             description="The total number of pixels dropped on post-FS stencil test."
              data_type="uint64"
-             equation="A 36 READ"
-             underscore_name="samples_killed_in_ps"
              units="pixels"
-             symbol_name="SamplesKilledInPs"
              semantic_type="event"
+             equation="A 38 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Late Depth Test Fails"
+             symbol_name="PostPsDepthTestFails"
+             underscore_name="post_ps_depth_test_fails"
              description="The total number of pixels dropped on post-FS depth test."
              data_type="uint64"
-             equation="A 39 READ $SamplesKilledInPs USUB"
-             underscore_name="post_ps_depth_test_fails"
              units="pixels"
-             symbol_name="PostPsDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 39 READ $SamplesKilledInPs USUB"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="TCS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="100"
-             equation="A 7 READ $HsThreads UDIV"
-             underscore_name="hs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="HsEuActivePerThread"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Control Shader"
+             equation="A 40 READ"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="FS Duration"
+             symbol_name="PsDuration"
+             underscore_name="ps_duration"
              description="Total Fragment Shader GPU duration."
              data_type="uint64"
-             equation="A 27 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 28 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="ps_duration"
              units="us"
-             symbol_name="PsDuration"
              semantic_type="duration"
+             equation="A 27 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 28 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL4"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Duration"
+             symbol_name="VsDuration"
+             underscore_name="vs_duration"
+             description="Total Vertex Shader GPU duration."
+             data_type="uint64"
+             units="us"
+             semantic_type="duration"
+             equation="A 2 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 3 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Sampler L2 cache misses (ss1)"
-             description="Number of sampler L2 cache misses (ss1)"
+    <counter name="GS Duration"
+             symbol_name="GsDuration"
+             underscore_name="gs_duration"
+             description="Total Geometry Shader GPU duration."
              data_type="uint64"
-             equation="C 5 READ C 4 READ UADD"
-             underscore_name="sampler1_l2_cache_misses"
-             units="messages"
-             symbol_name="Sampler1L2CacheMisses"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="Sampler/Sampler Cache"
+             units="us"
+             semantic_type="duration"
+             equation="A 22 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 23 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="TES Duration"
+             symbol_name="DsDuration"
+             underscore_name="ds_duration"
+             description="Total Evaluation Shader GPU duration."
              data_type="uint64"
-             equation="A 33 READ"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             units="us"
+             semantic_type="duration"
+             equation="A 12 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 13 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Evaluation Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="CS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs."
+    <counter name="TCS Duration"
+             symbol_name="HsDuration"
+             underscore_name="hs_duration"
+             description="Total Control Shader GPU duration."
              data_type="uint64"
-             equation="A 18 READ $CsThreads UDIV"
-             underscore_name="cs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="CsEuStallPerThread"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             units="us"
+             semantic_type="duration"
+             equation="A 7 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 8 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
+             mdapi_group="EU Array/Control Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Duration"
+             symbol_name="CsDuration"
+             underscore_name="cs_duration"
+             description="Total Compute Shader GPU duration."
+             data_type="uint64"
+             units="us"
+             semantic_type="duration"
+             equation="A 17 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 18 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
              mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Late Stencil Test Fails"
-             description="The total number of pixels dropped on post-FS stencil test."
+    <counter name="EU Idle"
+             symbol_name="EuIdle"
+             underscore_name="eu_idle"
+             description="The percentage of time in which the Execution Units were idle."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="100 $EuActive $EuStall FADD FSUB"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler L2 cache misses"
+             symbol_name="SamplerL2CacheMisses"
+             underscore_name="sampler_l2_cache_misses"
+             description="Number of sampler L2 cache misses"
              data_type="uint64"
-             equation="A 38 READ"
-             underscore_name="post_ps_stencil_test_fails"
-             units="pixels"
-             symbol_name="PostPsStencilTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 7 READ C 6 READ UADD C 5 READ UADD C 4 READ UADD C 3 READ UADD C 2 READ UADD C 1 READ UADD C 0 READ UADD"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
     <counter name="Sampler L2 cache misses (ss0)"
+             symbol_name="Sampler0L2CacheMisses"
+             underscore_name="sampler0_l2_cache_misses"
              description="Number of sampler L2 cache misses (ss0)"
              data_type="uint64"
-             equation="C 7 READ C 6 READ UADD"
-             underscore_name="sampler0_l2_cache_misses"
              units="messages"
-             symbol_name="Sampler0L2CacheMisses"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 7 READ C 6 READ UADD"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
+    <counter name="Sampler L2 cache misses (ss1)"
+             symbol_name="Sampler1L2CacheMisses"
+             underscore_name="sampler1_l2_cache_misses"
+             description="Number of sampler L2 cache misses (ss1)"
              data_type="uint64"
-             max_equation="100"
-             equation="A 2 READ $VsThreads UDIV"
-             underscore_name="vs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="VsEuActivePerThread"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             equation="C 5 READ C 4 READ UADD"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="VS Duration"
-             description="Total Vertex Shader GPU duration."
+    <counter name="Sampler L2 cache misses (ss2)"
+             symbol_name="Sampler2L2CacheMisses"
+             underscore_name="sampler2_l2_cache_misses"
+             description="Number of sampler L2 cache misses (ss2)"
              data_type="uint64"
-             equation="A 2 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 3 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV"
-             underscore_name="vs_duration"
-             units="us"
-             symbol_name="VsDuration"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
+             units="messages"
+             semantic_type="event"
+             equation="C 3 READ C 2 READ UADD"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Sampler L2 cache misses (ss3)"
+             symbol_name="Sampler3L2CacheMisses"
+             underscore_name="sampler3_l2_cache_misses"
+             description="Number of sampler L2 cache misses (ss3)"
              data_type="uint64"
-             equation="A 40 READ"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             equation="C 1 READ C 0 READ UADD"
+             availability="$SubsliceMask 0x8 AND"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
index d28c630..3953ead 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1572342629" merge_md5="">
   <set name="Render Metrics Basic Gen11"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="193093b2-da24-49bf-b407-442c213b71b4"
        chipset="ICL"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="193093b2-da24-49bf-b407-442c213b71b4"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Sampler00 Busy"
-             description="The percentage of time in which Slice0 Sampler0 has been processing EU requests."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_busy"
              units="percent"
-             symbol_name="Sampler00Busy"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samplers Busy"
-             description="The percentage of time in which samplers have been processing EU requests."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="$Sampler00Busy"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
-             availability="$SubsliceMask 9 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Sampler00 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Slice0 Sampler0 has been slowing down the pipe when processing EU requests."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler00_bottleneck"
              units="percent"
-             symbol_name="Sampler00Bottleneck"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  B 1 READ B 3 READ UADD B 4 READ UADD B 5 READ UADD UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Sampler00 Busy"
+             symbol_name="Sampler00Busy"
+             underscore_name="sampler00_busy"
+             description="The percentage of time in which Slice0 Sampler0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
+             description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="$Sampler00Busy"
+             availability="$SubsliceMask 9 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="Sampler00 Bottleneck"
+             symbol_name="Sampler00Bottleneck"
+             underscore_name="sampler00_bottleneck"
+             description="The percentage of time in which Slice0 Sampler0 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler00Bottleneck"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             availability="$SubsliceMask 9 AND"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  B 0 READ B 2 READ UADD UMUL"
-             underscore_name="gti_write_throughput"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ C 5 READ UADD 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="$ShaderMemoryAccesses 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  B 1 READ B 3 READ UADD B 4 READ UADD B 5 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  B 0 READ B 2 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler00Bottleneck"
+             availability="$SubsliceMask 9 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Compute Metrics Basic Gen11"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="5c99b520-174d-4e94-a95e-5392e9b2a944"
        chipset="ICL"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="5c99b520-174d-4e94-a95e-5392e9b2a944"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 3 READ C 2 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ C 4 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU AVG IPC Rate"
-             description="The average rate of IPC calculated for 2 FPU pipelines."
-             data_type="float"
-             max_equation="2"
-             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
-             units="number"
-             symbol_name="EuAvgIpcRate"
-             semantic_type="ratio"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+    <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
+             description="The average rate of IPC calculated for 2 FPU pipelines."
+             data_type="float"
+             max_equation="2"
+             units="number"
+             semantic_type="ratio"
+             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
+    <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Typed Atomics Accesses"
-             description="The total number of typed atomic accesses via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 4 READ B 5 READ UADD 2 UDIV $EuSubslicesTotalCount UMUL"
-             underscore_name="typed_atomics"
-             units="events"
-             symbol_name="TypedAtomics"
-             semantic_type="throughput"
+    <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="8 A 13 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  B 1 READ B 3 READ UADD B 6 READ UADD B 7 READ UADD UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 0 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
-             underscore_name="untyped_bytes_written"
-             units="bytes"
-             symbol_name="UntypedBytesWritten"
-             semantic_type="throughput"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Typed Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 7 READ C 6 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
-             underscore_name="typed_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="8 A 13 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$ShaderMemoryAccesses 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  B 0 READ B 2 READ UADD UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 7 READ C 6 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 5 READ C 4 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 3 READ C 2 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 1 READ C 0 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Typed Atomics Accesses"
+             symbol_name="TypedAtomics"
+             underscore_name="typed_atomics"
+             description="The total number of typed atomic accesses via Data Port."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="events"
+             semantic_type="throughput"
+             equation="B 4 READ B 5 READ UADD 2 UDIV $EuSubslicesTotalCount UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  B 1 READ B 3 READ UADD B 6 READ UADD B 7 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  B 0 READ B 2 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="ComputeExtended Gen11"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="43eb7fc1-dc5e-45e2-a90a-0053f5397271"
        chipset="ICL"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="43eb7fc1-dc5e-45e2-a90a-0053f5397271"
        >
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="B 7 READ B 1 READ FADD C 2 READ FADD C 3 READ FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="ratio"
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Eu Typed Atomics 00"
-             description="Slice0 Dualsubslice 0 Eu Typed Atomics"
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_atomics00"
-             units="messages"
-             symbol_name="EuTypedAtomics00"
-             availability="$SubsliceMask 1 AND"
+             units="cycles"
              semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 00"
-             description="Slice 0 Dualsubslice 0 typed atomics."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="typed_atomics00"
-             units="messages"
-             symbol_name="TypedAtomics00"
-             availability="$SubsliceMask 1 AND"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             equation="$EuTypedAtomics00 $TypedAtomics00 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="ratio"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Eu 64 Untyped Reads 00"
-             description="Slice0 Dualsubslice 0 Eu 64 Untyped Reads"
-             data_type="float"
-             equation="B 5 READ C 1 READ FADD B 6 READ FADD"
-             underscore_name="eu_a64_untyped_reads00"
-             units="messages"
-             symbol_name="EuA64UntypedReads00"
-             availability="$SubsliceMask 1 AND"
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Eu A32 Untyped Writes 00"
-             description="Slice0 Dualsubslice 0 Eu A32 Untyped Writes"
+    <counter name="Typed Atomics 00"
+             symbol_name="TypedAtomics00"
+             underscore_name="typed_atomics00"
+             description="Slice 0 Dualsubslice 0 typed atomics."
              data_type="uint64"
-             equation="B 7 READ B 1 READ UADD"
-             underscore_name="eu_a32_untyped_writes00"
              units="messages"
-             symbol_name="EuA32UntypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 7 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="Typed Reads 00"
+             symbol_name="TypedReads00"
+             underscore_name="typed_reads00"
+             description="Slice 0 Dualsubslice 0 typed reads."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="messages"
              semantic_type="event"
+             equation="C 6 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Typed Writes 00"
+             symbol_name="TypedWrites00"
+             underscore_name="typed_writes00"
+             description="Slice 0 Dualsubslice 0 typed writes."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Untyped Reads 00"
+             symbol_name="UntypedReads00"
+             underscore_name="untyped_reads00"
+             description="Slice 0 Dualsubslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="C 4 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Untyped Writes 00"
+             symbol_name="UntypedWrites00"
+             underscore_name="untyped_writes00"
+             description="Slice 0 Dualsubslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 3 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Eu Typed Reads 00"
+             symbol_name="EuTypedReads00"
+             underscore_name="eu_typed_reads00"
              description="Slice0 Dualsubslice 0 Eu Typed Reads"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_reads00"
              units="messages"
-             symbol_name="EuTypedReads00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="Eu Typed Writes 00"
+             symbol_name="EuTypedWrites00"
+             underscore_name="eu_typed_writes00"
              description="Slice0 Dualsubslice 0 Eu Typed Writes"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_typed_writes00"
              units="messages"
-             symbol_name="EuTypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Typed Writes 00"
-             description="Slice 0 Dualsubslice 0 typed writes."
+    <counter name="Eu Typed Atomics 00"
+             symbol_name="EuTypedAtomics00"
+             underscore_name="eu_typed_atomics00"
+             description="Slice0 Dualsubslice 0 Eu Typed Atomics"
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="typed_writes00"
              units="messages"
-             symbol_name="TypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites00 $TypedWrites00 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
+             equation="B 2 READ"
              availability="$SubsliceMask 1 AND"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Untyped Writes 00"
-             description="Slice 0 Dualsubslice 0 untyped writes (including SLM writes)."
+    <counter name="Eu A32 Untyped Reads 00"
+             symbol_name="EuA32UntypedReads00"
+             underscore_name="eu_a32_untyped_reads00"
+             description="Slice0 Dualsubslice 0 Eu A32 Untyped Reads"
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_writes00"
              units="messages"
-             symbol_name="UntypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 00"
-             description="Slice 0 Dualsubslice 0 untyped reads (including SLM reads)."
+    <counter name="Eu A32 Untyped Writes 00"
+             symbol_name="EuA32UntypedWrites00"
+             underscore_name="eu_a32_untyped_writes00"
+             description="Slice0 Dualsubslice 0 Eu A32 Untyped Writes"
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="untyped_reads00"
              units="messages"
-             symbol_name="UntypedReads00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ B 1 READ UADD"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Eu 64 Untyped Reads 00"
+             symbol_name="EuA64UntypedReads00"
+             underscore_name="eu_a64_untyped_reads00"
+             description="Slice0 Dualsubslice 0 Eu 64 Untyped Reads"
+             data_type="float"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ C 1 READ FADD B 6 READ FADD"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="Eu A64 Untyped Writes 00"
+             symbol_name="EuA64UntypedWrites00"
+             underscore_name="eu_a64_untyped_writes00"
              description="Slice0 Dualsubslice 0 Eu A64 Untyped Writes"
              data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="eu_a64_untyped_writes00"
              units="messages"
-             symbol_name="EuA64UntypedWrites00"
-             availability="$SubsliceMask 1 AND"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 2 READ"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="UntypedReadsPerCacheLine"
-             description="The ratio of EU untyped read requests to L3 cache line reads."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             equation="B 0 READ B 5 READ C 1 READ FADD B 6 READ FADD FADD C 4 READ FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 00"
-             description="Slice 0 Dualsubslice 0 typed reads."
-             data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="typed_reads00"
-             units="messages"
-             symbol_name="TypedReads00"
+             equation="$EuTypedAtomics00 $TypedAtomics00 FDIV"
              availability="$SubsliceMask 1 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
              description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuTypedReads00 $TypedReads00 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
-             availability="$SubsliceMask 1 AND"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="$EuTypedReads00 $TypedReads00 FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Eu A32 Untyped Reads 00"
-             description="Slice0 Dualsubslice 0 Eu A32 Untyped Reads"
-             data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_a32_untyped_reads00"
-             units="messages"
-             symbol_name="EuA32UntypedReads00"
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites00 $TypedWrites00 FDIV"
              availability="$SubsliceMask 1 AND"
-             semantic_type="event"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
+             description="The ratio of EU untyped read requests to L3 cache line reads."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="B 0 READ B 5 READ C 1 READ FADD B 6 READ FADD FADD C 4 READ FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="B 7 READ B 1 READ FADD C 2 READ FADD C 3 READ FDIV"
+             availability="$SubsliceMask 1 AND"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen11"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="05d5e01f-0800-4975-b36b-7b169cad3fab"
        chipset="ICL"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="05d5e01f-0800-4975-b36b-7b169cad3fab"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank3."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank0."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 7 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank1 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank1."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 6 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
-             units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank2."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 5 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
-             units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank4 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank4."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="l3_bank04_accesses"
-             units="messages"
-             symbol_name="L3Bank04Accesses"
-             availability="$SliceMask 1 AND"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="Slice0 L3 Bank5 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank5."
-             data_type="uint64"
-             equation="C 2 READ 2 UMUL"
-             underscore_name="l3_bank05_accesses"
-             units="messages"
-             symbol_name="L3Bank05Accesses"
-             availability="$SliceMask 1 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="Slice0 L3 Bank6 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank6."
-             data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank06_accesses"
-             units="messages"
-             symbol_name="L3Bank06Accesses"
-             availability="$SliceMask 1 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="Slice0 L3 Bank7 Accesses"
-             description="The total number of accesses to Slice0 L3 Bank7."
-             data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank07_accesses"
-             units="messages"
-             symbol_name="L3Bank07Accesses"
-             availability="$SliceMask 1 AND"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="$L3Bank00Accesses $L3Bank01Accesses UADD $L3Bank02Accesses UADD $L3Bank03Accesses UADD $L3Bank04Accesses UADD $L3Bank05Accesses UADD $L3Bank06Accesses UADD $L3Bank07Accesses UADD"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="64  B 7 READ B 6 READ UADD 8 UMUL UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
              description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
              data_type="float"
              max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
              units="percent"
-             symbol_name="EuMoveFpu0Instruction"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
-             units="messages"
-             symbol_name="SamplerAccesses"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="$SamplerAccesses 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
+             equation="$SamplerAccesses 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="L3 Lookup Accesses w/o IC"
-             description="The total number of L3 cache lookup accesses w/o IC."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="B 7 READ B 6 READ UADD 8 UMUL A 32 READ UADD"
-             underscore_name="l3_lookups"
-             units="messages"
-             symbol_name="L3Lookups"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 2 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Slice0 L3 Bank0 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to Slice0 L3 Bank0."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
+             equation="C 7 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Slice0 L3 Bank1 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to Slice0 L3 Bank1."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="C 6 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Slice0 L3 Bank2 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
+             description="The total number of accesses to Slice0 L3 Bank2."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Slice0 L3 Bank3 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to Slice0 L3 Bank3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Slice0 L3 Bank4 Accesses"
+             symbol_name="L3Bank04Accesses"
+             underscore_name="l3_bank04_accesses"
+             description="The total number of accesses to Slice0 L3 Bank4."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="C 3 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Slice0 L3 Bank5 Accesses"
+             symbol_name="L3Bank05Accesses"
+             underscore_name="l3_bank05_accesses"
+             description="The total number of accesses to Slice0 L3 Bank5."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="C 2 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Slice0 L3 Bank6 Accesses"
+             symbol_name="L3Bank06Accesses"
+             underscore_name="l3_bank06_accesses"
+             description="The total number of accesses to Slice0 L3 Bank6."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Slice0 L3 Bank7 Accesses"
+             symbol_name="L3Bank07Accesses"
+             underscore_name="l3_bank07_accesses"
+             description="The total number of accesses to Slice0 L3 Bank7."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 1 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="$L3Bank00Accesses $L3Bank01Accesses UADD $L3Bank02Accesses UADD $L3Bank03Accesses UADD $L3Bank04Accesses UADD $L3Bank05Accesses UADD $L3Bank06Accesses UADD $L3Bank07Accesses UADD"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
+             description="The total number of L3 cache lookup accesses w/o IC."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
-             units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+             units="messages"
+             semantic_type="event"
+             equation="B 7 READ B 6 READ UADD 8 UMUL A 32 READ UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 3 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="64  B 7 READ B 6 READ UADD 8 UMUL UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="$ShaderMemoryAccesses 64 UMUL"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 2 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen11"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="03c7a167-2abc-4ba6-878a-f1d80082abca"
        chipset="ICL"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="03c7a167-2abc-4ba6-878a-f1d80082abca"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             high_watermark="15"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="hi_depth_bottleneck"
              units="percent"
-             symbol_name="HiDepthBottleneck"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="bc_bottleneck"
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Strip-Fans Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="10"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="sf_bottleneck"
-             units="percent"
-             symbol_name="SfBottleneck"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
-             units="percent"
-             symbol_name="SfStall"
-             semantic_type="duration"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$ShaderMemoryAccesses 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="9"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Clipper"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SO Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
-             high_watermark="15"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="so_bottleneck"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="ds_bottleneck"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="SO Bottleneck"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
+             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Clipper Bottleneck"
-             low_watermark="10"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
              description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="10"
              high_watermark="30"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Early Depth Bottleneck"
-             low_watermark="10"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
              description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="10"
              high_watermark="30"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="early_depth_bottleneck"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
+             description="The percentage of time in which stream-output pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SO Stall"
-             description="The percentage of time in which stream-output pipeline stage was stalled."
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="c5cbc488-6569-41dd-9128-42bf6f0d317c"
        chipset="ICL"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="c5cbc488-6569-41dd-9128-42bf6f0d317c"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
              units="percent"
-             symbol_name="GTRequestQueueFull"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 6 READ"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2)"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="$ShaderMemoryAccesses 64 UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0)"
              data_type="float"
              max_equation="100"
-             equation="C 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader03AccessStalledOnL3"
+             underscore_name="non_sampler_shader03_access_stalled_on_l3"
              description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice3)"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader03_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader03AccessStalledOnL3"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 6 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="0316ce4f-e03f-4738-8262-13528fce8eea"
        chipset="ICL"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="0316ce4f-e03f-4738-8262-13528fce8eea"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
              units="percent"
-             symbol_name="GTRequestQueueFull"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Pipe0 PS Output Available"
-             description="The percentage of time in which slice0 pipe0 PS output is available"
-             data_type="float"
-             max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output00_available"
-             units="percent"
-             symbol_name="PSOutput00Available"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 5 READ"
              mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Slice0 Pipe1 PS Output Available"
-             description="The percentage of time in which slice0 pipe1 PS output is available"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output01_available"
-             units="percent"
-             symbol_name="PSOutput01Available"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Pipe1 Pixel Values Ready"
-             description="The percentage of time in which slice0 pipe1 pixel values are ready"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values01_ready"
              units="percent"
-             symbol_name="PixelValues01Ready"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Pipe0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data00_ready"
              units="percent"
-             symbol_name="PixelData00Ready"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="Slice0 Pipe1 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0  pipe1 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data01_ready"
              units="percent"
-             symbol_name="PixelData01Ready"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 21 READ 4 UMUL"
              mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="Slice0 Pipe0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pipe0 pixel values are ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values00_ready"
-             units="percent"
-             symbol_name="PixelValues00Ready"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <register_config type="NOA">
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$ShaderMemoryAccesses 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pipe0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData00Ready"
+             underscore_name="pixel_data00_ready"
+             description="The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pipe1 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData01Ready"
+             underscore_name="pixel_data01_ready"
+             description="The percentage of time in which slice0  pipe1 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pipe0 PS Output Available"
+             symbol_name="PSOutput00Available"
+             underscore_name="ps_output00_available"
+             description="The percentage of time in which slice0 pipe0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pipe1 PS Output Available"
+             symbol_name="PSOutput01Available"
+             underscore_name="ps_output01_available"
+             description="The percentage of time in which slice0 pipe1 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pipe0 Pixel Values Ready"
+             symbol_name="PixelValues00Ready"
+             underscore_name="pixel_values00_ready"
+             description="The percentage of time in which slice0 pipe0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pipe1 Pixel Values Ready"
+             symbol_name="PixelValues01Ready"
+             underscore_name="pixel_values01_ready"
+             description="The percentage of time in which slice0 pipe1 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
         <register type="NOA" address="0x00009840" value="0x00000000" />
         <register type="NOA" address="0x00009884" value="0x00000000" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="21d69ec3-91e1-48a8-acd6-c0c4ec6e819a"
        chipset="ICL"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="21d69ec3-91e1-48a8-acd6-c0c4ec6e819a"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
              units="percent"
-             symbol_name="GTRequestQueueFull"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
     <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
              description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Slice0 L3 Bank5 Active"
-             description="The percentage of time in which slice0 L3 bank5 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank5_active"
-             units="percent"
-             symbol_name="L30Bank5Active"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="$ShaderMemoryAccesses 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 L3 Bank6 Active"
-             description="The percentage of time in which slice0 L3 bank6 is active"
+    <counter name="Slice0 L3 Bank4 Active"
+             symbol_name="L30Bank4Active"
+             underscore_name="l30_bank4_active"
+             description="The percentage of time in which slice0 L3 bank4 is active"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank6_active"
              units="percent"
-             symbol_name="L30Bank6Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="Slice0 L3 Bank5 Active"
+             symbol_name="L30Bank5Active"
+             underscore_name="l30_bank5_active"
+             description="The percentage of time in which slice0 L3 bank5 is active"
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
-             units="percent"
-             symbol_name="L30Bank0Active"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 L3 Bank6 Active"
+             symbol_name="L30Bank6Active"
+             underscore_name="l30_bank6_active"
+             description="The percentage of time in which slice0 L3 bank6 is active"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Slice0 L3 Bank7 Active"
+             symbol_name="L30Bank7Active"
+             underscore_name="l30_bank7_active"
              description="The percentage of time in which slice0 L3 bank7 is active"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank7_active"
              units="percent"
-             symbol_name="L30Bank7Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 L3 Bank4 Active"
-             description="The percentage of time in which slice0 L3 bank4 is active"
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank4_active"
              units="percent"
-             symbol_name="L30Bank4Active"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Gen11 L2Bank0 stalled metric set"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="e60e9155-6830-4aec-baf2-1c3c15a73869"
        chipset="ICL"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="e60e9155-6830-4aec-baf2-1c3c15a73869"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
              units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Gen11 L2Bank1 stalled metric set"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="47c364d5-1799-4d17-9447-add9358c6451"
        chipset="ICL"
        symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="47c364d5-1799-4d17-9447-add9358c6451"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
         <register type="NOA" address="0x00009840" value="0x00000000" />
   </set>
 
   <set name="Gen11 L2Bank4 stalled metric set"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_4"
-       hw_config_guid="e5ab5c08-3130-4469-8eaf-b23d3dc817d4"
        chipset="ICL"
        symbol_name="L3_4"
+       underscore_name="l3_4"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="e5ab5c08-3130-4469-8eaf-b23d3dc817d4"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice0 L3 Bank4 Stalled"
-             description="The percentage of time in which slice0 L3 bank4 is stalled"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank4_stalled"
              units="percent"
-             symbol_name="L30Bank4Stalled"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Slice0 L3 Bank4 Stalled"
+             symbol_name="L30Bank4Stalled"
+             underscore_name="l30_bank4_stalled"
+             description="The percentage of time in which slice0 L3 bank4 is stalled"
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Gen11 L2Bank5 stalled metric set"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_5"
-       hw_config_guid="6cdf23c1-f725-414c-959a-c90fa5571b1f"
        chipset="ICL"
        symbol_name="L3_5"
+       underscore_name="l3_5"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="6cdf23c1-f725-414c-959a-c90fa5571b1f"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 L3 Bank5 Stalled"
-             description="The percentage of time in which slice0 L3 bank5 is stalled"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ B 3 READ FADD B 0 READ FADD B 1 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank5_stalled"
              units="percent"
-             symbol_name="L30Bank5Stalled"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Slice0 L3 Bank5 Stalled"
+             symbol_name="L30Bank5Stalled"
+             underscore_name="l30_bank5_stalled"
+             description="The percentage of time in which slice0 L3 bank5 is stalled"
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="B 2 READ B 3 READ FADD B 0 READ FADD B 1 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set Sampler 1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler_1"
-       hw_config_guid="51a2eb6d-9fad-4489-8f22-ab845fe7882a"
        chipset="ICL"
        symbol_name="Sampler_1"
+       underscore_name="sampler_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="51a2eb6d-9fad-4489-8f22-ab845fe7882a"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
-             units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
-             units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
              units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Slice0 Subslice6 Input Available"
-             description="The percentage of time in which slice0 subslice6 sampler input is available"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler06_input_available"
              units="percent"
-             symbol_name="Sampler06InputAvailable"
-             availability="$SubsliceMask 64 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice4 Input Available"
-             description="The percentage of time in which slice0 subslice4 sampler input is available"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler04_input_available"
              units="percent"
-             symbol_name="Sampler04InputAvailable"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Slice0 Subslice3 Input Available"
-             description="The percentage of time in which slice0 subslice3 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler03_input_available"
-             units="percent"
-             symbol_name="Sampler03InputAvailable"
-             availability="$SubsliceMask 8 AND"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice5 Input Available"
-             description="The percentage of time in which slice0 subslice5 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler05_input_available"
-             units="percent"
-             symbol_name="Sampler05InputAvailable"
-             availability="$SubsliceMask 32 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Slice0 Subslice7 Input Available"
-             description="The percentage of time in which slice0 subslice7 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler07_input_available"
-             units="percent"
-             symbol_name="Sampler07InputAvailable"
-             availability="$SubsliceMask 128 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="$ShaderMemoryAccesses 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <register_config type="NOA">
-        <register type="NOA" address="0x00000D04" value="0x00000200" />
-        <register type="NOA" address="0x00009840" value="0x00000000" />
-        <register type="NOA" address="0x00009884" value="0x00000000" />
-        <register type="NOA" address="0x00009888" value="0x142A0165" />
-        <register type="NOA" address="0x00009888" value="0x142F0165" />
-        <register type="NOA" address="0x00009888" value="0x146A0165" />
-        <register type="NOA" address="0x00009888" value="0x146F0165" />
-        <register type="NOA" address="0x00009888" value="0x14AA0165" />
-        <register type="NOA" address="0x00009888" value="0x14AF0165" />
-        <register type="NOA" address="0x00009888" value="0x14EA0165" />
-        <register type="NOA" address="0x00009888" value="0x14EF0165" />
-        <register type="NOA" address="0x00009888" value="0x161E8000" />
-        <register type="NOA" address="0x00009888" value="0x181EC000" />
-        <register type="NOA" address="0x00009888" value="0x1A1E0400" />
-        <register type="NOA" address="0x00009888" value="0x061F8000" />
-        <register type="NOA" address="0x00009888" value="0x081F8000" />
-        <register type="NOA" address="0x00009888" value="0x0A1F8000" />
-        <register type="NOA" address="0x00009888" value="0x0C1F0400" />
-        <register type="NOA" address="0x00009888" value="0x06204000" />
-        <register type="NOA" address="0x00009888" value="0x08204000" />
-        <register type="NOA" address="0x00009888" value="0x0A204000" />
-        <register type="NOA" address="0x00009888" value="0x0C204000" />
-        <register type="NOA" address="0x00009888" value="0x06218000" />
-        <register type="NOA" address="0x00009888" value="0x08218000" />
-        <register type="NOA" address="0x00009888" value="0x0A218000" />
-        <register type="NOA" address="0x00009888" value="0x0C218000" />
-        <register type="NOA" address="0x00009888" value="0x042A1800" />
-        <register type="NOA" address="0x00009888" value="0x062AC038" />
-        <register type="NOA" address="0x00009888" value="0x102A0000" />
-        <register type="NOA" address="0x00009888" value="0x082AC000" />
-        <register type="NOA" address="0x00009888" value="0x0A2A0000" />
-        <register type="NOA" address="0x00009888" value="0x0C2A0000" />
-        <register type="NOA" address="0x00009888" value="0x0A2C8000" />
-        <register type="NOA" address="0x00009888" value="0x0C2C8000" />
-        <register type="NOA" address="0x00009888" value="0x0E2C8000" />
-        <register type="NOA" address="0x00009888" value="0x182C0002" />
-        <register type="NOA" address="0x00009888" value="0x022F2000" />
-        <register type="NOA" address="0x00009888" value="0x042F0048" />
-        <register type="NOA" address="0x00009888" value="0x102F0000" />
-        <register type="NOA" address="0x00009888" value="0x1C5E000C" />
-        <register type="NOA" address="0x00009888" value="0x125E8000" />
-        <register type="NOA" address="0x00009888" value="0x145E8000" />
-        <register type="NOA" address="0x00009888" value="0x0E5F000C" />
-        <register type="NOA" address="0x00009888" value="0x025F8000" />
-        <register type="NOA" address="0x00009888" value="0x045F8000" />
-        <register type="NOA" address="0x00009888" value="0x1C600014" />
-        <register type="NOA" address="0x00009888" value="0x02604000" />
-        <register type="NOA" address="0x00009888" value="0x04604000" />
-        <register type="NOA" address="0x00009888" value="0x16612800" />
-        <register type="NOA" address="0x00009888" value="0x02618000" />
-        <register type="NOA" address="0x00009888" value="0x04618000" />
-        <register type="NOA" address="0x00009888" value="0x006A1800" />
-        <register type="NOA" address="0x00009888" value="0x026A0038" />
-        <register type="NOA" address="0x00009888" value="0x106A0000" />
-        <register type="NOA" address="0x00009888" value="0x1A6A00F0" />
-        <register type="NOA" address="0x00009888" value="0x046A0000" />
-        <register type="NOA" address="0x00009888" value="0x1A6C000A" />
-        <register type="NOA" address="0x00009888" value="0x066C8000" />
-        <register type="NOA" address="0x00009888" value="0x086C8000" />
-        <register type="NOA" address="0x00009888" value="0x0E6F2440" />
-        <register type="NOA" address="0x00009888" value="0x106F0000" />
-        <register type="NOA" address="0x00009888" value="0x1A9EC000" />
-        <register type="NOA" address="0x00009888" value="0x1C9E0003" />
-        <register type="NOA" address="0x00009888" value="0x0C9FC000" />
-        <register type="NOA" address="0x00009888" value="0x0E9F0003" />
-        <register type="NOA" address="0x00009888" value="0x1AA02A00" />
-        <register type="NOA" address="0x00009888" value="0x1CA00001" />
-        <register type="NOA" address="0x00009888" value="0x16A102A8" />
-        <register type="NOA" address="0x00009888" value="0x0CAA1C30" />
-        <register type="NOA" address="0x00009888" value="0x10AA0000" />
-        <register type="NOA" address="0x00009888" value="0x18AAF000" />
-        <register type="NOA" address="0x00009888" value="0x1AAA0000" />
-        <register type="NOA" address="0x00009888" value="0x18ACAA00" />
-        <register type="NOA" address="0x00009888" value="0x0AAF2440" />
-        <register type="NOA" address="0x00009888" value="0x10AF0000" />
-        <register type="NOA" address="0x00009888" value="0x10DE8000" />
-        <register type="NOA" address="0x00009888" value="0x1ADE3800" />
-        <register type="NOA" address="0x00009888" value="0x00DF8000" />
-        <register type="NOA" address="0x00009888" value="0x0CDF3800" />
-        <register type="NOA" address="0x00009888" value="0x00E04000" />
-        <register type="NOA" address="0x00009888" value="0x0EE04000" />
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice3 Input Available"
+             symbol_name="Sampler03InputAvailable"
+             underscore_name="sampler03_input_available"
+             description="The percentage of time in which slice0 subslice3 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice4 Input Available"
+             symbol_name="Sampler04InputAvailable"
+             underscore_name="sampler04_input_available"
+             description="The percentage of time in which slice0 subslice4 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice5 Input Available"
+             symbol_name="Sampler05InputAvailable"
+             underscore_name="sampler05_input_available"
+             description="The percentage of time in which slice0 subslice5 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice6 Input Available"
+             symbol_name="Sampler06InputAvailable"
+             underscore_name="sampler06_input_available"
+             description="The percentage of time in which slice0 subslice6 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice7 Input Available"
+             symbol_name="Sampler07InputAvailable"
+             underscore_name="sampler07_input_available"
+             description="The percentage of time in which slice0 subslice7 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <register_config type="NOA">
+        <register type="NOA" address="0x00000D04" value="0x00000200" />
+        <register type="NOA" address="0x00009840" value="0x00000000" />
+        <register type="NOA" address="0x00009884" value="0x00000000" />
+        <register type="NOA" address="0x00009888" value="0x142A0165" />
+        <register type="NOA" address="0x00009888" value="0x142F0165" />
+        <register type="NOA" address="0x00009888" value="0x146A0165" />
+        <register type="NOA" address="0x00009888" value="0x146F0165" />
+        <register type="NOA" address="0x00009888" value="0x14AA0165" />
+        <register type="NOA" address="0x00009888" value="0x14AF0165" />
+        <register type="NOA" address="0x00009888" value="0x14EA0165" />
+        <register type="NOA" address="0x00009888" value="0x14EF0165" />
+        <register type="NOA" address="0x00009888" value="0x161E8000" />
+        <register type="NOA" address="0x00009888" value="0x181EC000" />
+        <register type="NOA" address="0x00009888" value="0x1A1E0400" />
+        <register type="NOA" address="0x00009888" value="0x061F8000" />
+        <register type="NOA" address="0x00009888" value="0x081F8000" />
+        <register type="NOA" address="0x00009888" value="0x0A1F8000" />
+        <register type="NOA" address="0x00009888" value="0x0C1F0400" />
+        <register type="NOA" address="0x00009888" value="0x06204000" />
+        <register type="NOA" address="0x00009888" value="0x08204000" />
+        <register type="NOA" address="0x00009888" value="0x0A204000" />
+        <register type="NOA" address="0x00009888" value="0x0C204000" />
+        <register type="NOA" address="0x00009888" value="0x06218000" />
+        <register type="NOA" address="0x00009888" value="0x08218000" />
+        <register type="NOA" address="0x00009888" value="0x0A218000" />
+        <register type="NOA" address="0x00009888" value="0x0C218000" />
+        <register type="NOA" address="0x00009888" value="0x042A1800" />
+        <register type="NOA" address="0x00009888" value="0x062AC038" />
+        <register type="NOA" address="0x00009888" value="0x102A0000" />
+        <register type="NOA" address="0x00009888" value="0x082AC000" />
+        <register type="NOA" address="0x00009888" value="0x0A2A0000" />
+        <register type="NOA" address="0x00009888" value="0x0C2A0000" />
+        <register type="NOA" address="0x00009888" value="0x0A2C8000" />
+        <register type="NOA" address="0x00009888" value="0x0C2C8000" />
+        <register type="NOA" address="0x00009888" value="0x0E2C8000" />
+        <register type="NOA" address="0x00009888" value="0x182C0002" />
+        <register type="NOA" address="0x00009888" value="0x022F2000" />
+        <register type="NOA" address="0x00009888" value="0x042F0048" />
+        <register type="NOA" address="0x00009888" value="0x102F0000" />
+        <register type="NOA" address="0x00009888" value="0x1C5E000C" />
+        <register type="NOA" address="0x00009888" value="0x125E8000" />
+        <register type="NOA" address="0x00009888" value="0x145E8000" />
+        <register type="NOA" address="0x00009888" value="0x0E5F000C" />
+        <register type="NOA" address="0x00009888" value="0x025F8000" />
+        <register type="NOA" address="0x00009888" value="0x045F8000" />
+        <register type="NOA" address="0x00009888" value="0x1C600014" />
+        <register type="NOA" address="0x00009888" value="0x02604000" />
+        <register type="NOA" address="0x00009888" value="0x04604000" />
+        <register type="NOA" address="0x00009888" value="0x16612800" />
+        <register type="NOA" address="0x00009888" value="0x02618000" />
+        <register type="NOA" address="0x00009888" value="0x04618000" />
+        <register type="NOA" address="0x00009888" value="0x006A1800" />
+        <register type="NOA" address="0x00009888" value="0x026A0038" />
+        <register type="NOA" address="0x00009888" value="0x106A0000" />
+        <register type="NOA" address="0x00009888" value="0x1A6A00F0" />
+        <register type="NOA" address="0x00009888" value="0x046A0000" />
+        <register type="NOA" address="0x00009888" value="0x1A6C000A" />
+        <register type="NOA" address="0x00009888" value="0x066C8000" />
+        <register type="NOA" address="0x00009888" value="0x086C8000" />
+        <register type="NOA" address="0x00009888" value="0x0E6F2440" />
+        <register type="NOA" address="0x00009888" value="0x106F0000" />
+        <register type="NOA" address="0x00009888" value="0x1A9EC000" />
+        <register type="NOA" address="0x00009888" value="0x1C9E0003" />
+        <register type="NOA" address="0x00009888" value="0x0C9FC000" />
+        <register type="NOA" address="0x00009888" value="0x0E9F0003" />
+        <register type="NOA" address="0x00009888" value="0x1AA02A00" />
+        <register type="NOA" address="0x00009888" value="0x1CA00001" />
+        <register type="NOA" address="0x00009888" value="0x16A102A8" />
+        <register type="NOA" address="0x00009888" value="0x0CAA1C30" />
+        <register type="NOA" address="0x00009888" value="0x10AA0000" />
+        <register type="NOA" address="0x00009888" value="0x18AAF000" />
+        <register type="NOA" address="0x00009888" value="0x1AAA0000" />
+        <register type="NOA" address="0x00009888" value="0x18ACAA00" />
+        <register type="NOA" address="0x00009888" value="0x0AAF2440" />
+        <register type="NOA" address="0x00009888" value="0x10AF0000" />
+        <register type="NOA" address="0x00009888" value="0x10DE8000" />
+        <register type="NOA" address="0x00009888" value="0x1ADE3800" />
+        <register type="NOA" address="0x00009888" value="0x00DF8000" />
+        <register type="NOA" address="0x00009888" value="0x0CDF3800" />
+        <register type="NOA" address="0x00009888" value="0x00E04000" />
+        <register type="NOA" address="0x00009888" value="0x0EE04000" />
         <register type="NOA" address="0x00009888" value="0x1AE000A0" />
         <register type="NOA" address="0x00009888" value="0x00E18000" />
         <register type="NOA" address="0x00009888" value="0x0EE18000" />
   </set>
 
   <set name="Metric set Sampler 2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler_2"
-       hw_config_guid="afc0f021-8c33-4d60-803d-93487f96c7c1"
        chipset="ICL"
        symbol_name="Sampler_2"
+       underscore_name="sampler_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="afc0f021-8c33-4d60-803d-93487f96c7c1"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
-             data_type="float"
-             max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
-             units="percent"
-             symbol_name="GTRequestQueueFull"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
              units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 4 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Subslice6 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice6 sampler output is ready"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler06_output_ready"
              units="percent"
-             symbol_name="Sampler06OutputReady"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice4 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice4 sampler output is ready"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler04_output_ready"
              units="percent"
-             symbol_name="Sampler04OutputReady"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Slice0 Subslice3 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice3 sampler output is ready"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler03_output_ready"
              units="percent"
-             symbol_name="Sampler03OutputReady"
-             availability="$SubsliceMask 8 AND"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice7 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice7 sampler output is ready"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler07_output_ready"
              units="percent"
-             symbol_name="Sampler07OutputReady"
-             availability="$SubsliceMask 128 AND"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Slice0 Subslice5 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice5 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler05_output_ready"
-             units="percent"
-             symbol_name="Sampler05OutputReady"
-             availability="$SubsliceMask 32 AND"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice0 Subslice0 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
-             units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice1 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
-             units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 2 AND"
-             semantic_type="duration"
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="$ShaderMemoryAccesses 64 UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
+             description="The percentage of time in which slice0 subslice0 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
+             description="The percentage of time in which slice0 subslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice3 Sampler Output Ready"
+             symbol_name="Sampler03OutputReady"
+             underscore_name="sampler03_output_ready"
+             description="The percentage of time in which slice0 subslice3 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice4 Sampler Output Ready"
+             symbol_name="Sampler04OutputReady"
+             underscore_name="sampler04_output_ready"
+             description="The percentage of time in which slice0 subslice4 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice5 Sampler Output Ready"
+             symbol_name="Sampler05OutputReady"
+             underscore_name="sampler05_output_ready"
+             description="The percentage of time in which slice0 subslice5 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice6 Sampler Output Ready"
+             symbol_name="Sampler06OutputReady"
+             underscore_name="sampler06_output_ready"
+             description="The percentage of time in which slice0 subslice6 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice7 Sampler Output Ready"
+             symbol_name="Sampler07OutputReady"
+             underscore_name="sampler07_output_ready"
+             description="The percentage of time in which slice0 subslice7 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="eddc2f32-b196-4a72-9bf8-21770e35f8bd"
        chipset="ICL"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="eddc2f32-b196-4a72-9bf8-21770e35f8bd"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS EU Active"
+             symbol_name="VsEuActive"
+             underscore_name="vs_eu_active"
+             description="The percentage of time in which vertex shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS AVG Active per Thread"
+             symbol_name="VsEuActivePerThread"
+             underscore_name="vs_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
+             equation="A 13 READ A 1 READ UDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="VS EU Stall"
+             symbol_name="VsEuStall"
+             underscore_name="vs_eu_stall"
+             description="The percentage of time in which vertex shaders were stalled on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS AVG Stall per Thread"
+             symbol_name="VsEuStallPerThread"
+             underscore_name="vs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="cycles"
              semantic_type="event"
+             equation="A 14 READ A 1 READ UDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
     <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
              description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+    <counter name="FS EU Active"
+             symbol_name="PsEuActive"
+             underscore_name="ps_eu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="FS AVG Active per Thread"
+             symbol_name="PsEuActivePerThread"
+             underscore_name="ps_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="cycles"
              semantic_type="event"
+             equation="A 19 READ A 6 READ UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice3 thread dispatcher"
+    <counter name="FS EU Stall"
+             symbol_name="PsEuStall"
+             underscore_name="ps_eu_stall"
+             description="The percentage of time in which fragment shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread03_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread03ReadyForDispatch"
-             availability="$SubsliceMask 8 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS AVG Stall per Thread"
+             symbol_name="PsEuStallPerThread"
+             underscore_name="ps_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="cycles"
              semantic_type="event"
+             equation="A 20 READ A 6 READ UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 20 READ A 6 READ UDIV"
-             underscore_name="ps_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="PsEuStallPerThread"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 14 READ A 1 READ UDIV"
-             underscore_name="vs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="VsEuStallPerThread"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 19 READ A 6 READ UDIV"
-             underscore_name="ps_eu_active_per_thread"
-             units="cycles"
-             symbol_name="PsEuActivePerThread"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="FS EU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_active"
-             units="percent"
-             symbol_name="PsEuActive"
-             semantic_type="duration"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 34 READ"
              mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice7 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread07_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread07ReadyForDispatch"
-             availability="$SubsliceMask 128 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$ShaderMemoryAccesses 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="FS EU Stall"
-             description="The percentage of time in which fragment shaders were stalled on the EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_stall"
-             units="percent"
-             symbol_name="PsEuStall"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice5 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread05_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread05ReadyForDispatch"
-             availability="$SubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="VS EU Active"
-             description="The percentage of time in which vertex shaders were processed actively on the EUs."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_active"
              units="percent"
-             symbol_name="VsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher"
+             symbol_name="PSThread03ReadyForDispatch"
+             underscore_name="ps_thread03_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice3 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher"
+             symbol_name="PSThread04ReadyForDispatch"
+             underscore_name="ps_thread04_ready_for_dispatch"
              description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice4 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread04_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread04ReadyForDispatch"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="VS EU Stall"
-             description="The percentage of time in which vertex shaders were stalled on the EUs."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher"
+             symbol_name="PSThread05ReadyForDispatch"
+             underscore_name="ps_thread05_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice5 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_stall"
              units="percent"
-             symbol_name="VsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="VS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
-             data_type="uint64"
-             equation="A 13 READ A 1 READ UDIV"
-             underscore_name="vs_eu_active_per_thread"
-             units="cycles"
-             symbol_name="VsEuActivePerThread"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher"
+             symbol_name="PSThread06ReadyForDispatch"
+             underscore_name="ps_thread06_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice6 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice6 thread dispatcher"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher"
+             symbol_name="PSThread07ReadyForDispatch"
+             underscore_name="ps_thread07_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice7 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread06_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread06ReadyForDispatch"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="c6d3af7b-037b-4656-95e1-4f838f0a2c14"
        chipset="ICL"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="c6d3af7b-037b-4656-95e1-4f838f0a2c14"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="SQ is full"
-             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue_full"
-             units="percent"
-             symbol_name="GTRequestQueueFull"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="GPU_CLOCK 0 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 4 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice3 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="GPU_CLOCK 0 READ C 2 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread03_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread03ReadyForDispatch"
-             availability="$SubsliceMask 8 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice7 thread dispatcher"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 6 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread07_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread07ReadyForDispatch"
-             availability="$SubsliceMask 128 AND"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$ShaderMemoryAccesses 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice6 thread dispatcher"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread06_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread06ReadyForDispatch"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 0 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice5 thread dispatcher"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher"
+             symbol_name="NonPSThread03ReadyForDispatch"
+             underscore_name="non_ps_thread03_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice3 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 4 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread05_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread05ReadyForDispatch"
-             availability="$SubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ C 2 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher"
+             symbol_name="NonPSThread04ReadyForDispatch"
+             underscore_name="non_ps_thread04_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice4 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread04_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread04ReadyForDispatch"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher"
+             symbol_name="NonPSThread05ReadyForDispatch"
+             underscore_name="non_ps_thread05_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice5 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 4 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher"
+             symbol_name="NonPSThread06ReadyForDispatch"
+             underscore_name="non_ps_thread06_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice6 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher"
+             symbol_name="NonPSThread07ReadyForDispatch"
+             underscore_name="non_ps_thread07_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice7 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="GPU_CLOCK 0 READ C 6 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+    <counter name="SQ is full"
+             symbol_name="GTRequestQueueFull"
+             underscore_name="gt_request_queue_full"
+             description="The percentage of time when SQ is filled above a threshold (usually 48 entries)"
              data_type="float"
              max_equation="100"
-             equation="GPU_CLOCK 0 READ C 0 READ FSUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
-             availability="$SubsliceMask 2 AND"
              semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set TDL_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_3"
-       hw_config_guid="fd25ec19-3ed1-40c9-8648-1d2387449a92"
        chipset="ICL"
        symbol_name="TDL_3"
+       underscore_name="tdl_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="fd25ec19-3ed1-40c9-8648-1d2387449a92"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 1"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header06_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader06ReadyPort1"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 0"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header03_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader03ReadyPort0"
-             availability="$SubsliceMask 8 AND"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 1"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header03_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader03ReadyPort1"
-             availability="$SubsliceMask 8 AND"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 1"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header04_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader04ReadyPort1"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 0"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header04_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader04ReadyPort0"
-             availability="$SubsliceMask 16 AND"
              semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header06_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader06ReadyPort0"
-             availability="$SubsliceMask 64 AND"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
-             units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
-             units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 2 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="$ShaderMemoryAccesses 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header07_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader07ReadyPort0"
-             availability="$SubsliceMask 128 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader03ReadyPort0"
+             underscore_name="thread_header03_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader03ReadyPort1"
+             underscore_name="thread_header03_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 0"
-             data_type="float"
-             max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header05_ready_port0"
-             units="percent"
-             symbol_name="ThreadHeader05ReadyPort0"
-             availability="$SubsliceMask 32 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader04ReadyPort0"
+             underscore_name="thread_header04_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 1"
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader04ReadyPort1"
+             underscore_name="thread_header04_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header07_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader07ReadyPort1"
-             availability="$SubsliceMask 128 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 16 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="$ShaderMemoryAccesses 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader05ReadyPort0"
+             underscore_name="thread_header05_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader05ReadyPort1"
+             underscore_name="thread_header05_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header05_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader05ReadyPort1"
-             availability="$SubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 32 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader06ReadyPort0"
+             underscore_name="thread_header06_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader06ReadyPort1"
+             underscore_name="thread_header06_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 64 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader07ReadyPort0"
+             underscore_name="thread_header07_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
+    <counter name="Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader07ReadyPort1"
+             underscore_name="thread_header07_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
-             availability="$SubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 128 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="40dc79f2-88c8-47c6-8f86-f509e39fbe5d"
        chipset="ICL"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="40dc79f2-88c8-47c6-8f86-f509e39fbe5d"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Posh Ring Busy"
-             description="The percentage of time when posh command streamer was busy."
+    <counter name="Vdbox1 Ring Busy"
+             symbol_name="Vdbox1Busy"
+             underscore_name="vdbox1_busy"
+             description="The percentage of time when Vdbox1 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="posh_engine_busy"
              units="percent"
-             symbol_name="PoshEngineBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Blitter Ring Busy"
-             description="The percentage of time when blitter command streamer was busy."
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vdbox1 Ring Busy"
-             description="The percentage of time when Vdbox1 command streamer was busy."
+    <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
+             description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox1_busy"
              units="percent"
-             symbol_name="Vdbox1Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="AnyRingBusy"
-             description="The percentage of time when any command streamer was busy."
+    <counter name="Posh Ring Busy"
+             symbol_name="PoshEngineBusy"
+             underscore_name="posh_engine_busy"
+             description="The percentage of time when posh command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
+             description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TestOa"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="3c0bf614-5d67-4326-887f-a24eb8a58244"
        chipset="ICL"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="3c0bf614-5d67-4326-887f-a24eb8a58244"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.6666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.3333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.3333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.16666"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.3333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.3333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.16666"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.6666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
index c6f66c5..cafe0ab 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1553959549" merge_md5="">
   <set name="Render Metrics Basic Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="99c1a40e-a090-4354-86e3-4d068bb1917e"
        chipset="KBLGT2"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="99c1a40e-a090-4354-86e3-4d068bb1917e"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
-             data_type="uint64"
-             equation="B 4 READ 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler1_bottleneck"
              units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
              description="The percentage of time in which Sampler 0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which Sampler 1 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
+             equation="B 4 READ 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
     <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
              description="The total number of L3 cache lookup accesses w/o IC."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
              units="messages"
-             symbol_name="L3Lookups"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
              description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
              mdapi_group="GTI/Depth Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
+             description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI RCC Throughput"
-             description="The total number of GPU memory bytes transferred between render color caches and GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Color Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Basic Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="e17fc42a-e614-41b6-90c4-1074841a6c77"
        chipset="KBLGT2"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="e17fc42a-e614-41b6-90c4-1074841a6c77"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="d7a17a3a-ca71-40d2-a919-ace80d50633f"
        chipset="KBLGT2"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="d7a17a3a-ca71-40d2-a919-ace80d50633f"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             high_watermark="15"
              data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
              units="percent"
-             symbol_name="SfBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
     <counter name="SO Bottleneck"
-             low_watermark="5"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
              description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Early Depth Bottleneck"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
+             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Clipper Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
-    <counter name="Early Depth Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
              data_type="float"
-             high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Reads Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="57b59202-172b-477a-87de-33f85572c589"
        chipset="KBLGT2"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="57b59202-172b-477a-87de-33f85572c589"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
              units="messages"
-             symbol_name="GtiL3Bank1Reads"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiCmdStreamerMemoryReads"
-             description="The total number of GTI memory reads from Command Streamer."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
              units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
+    <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
+             description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
              units="messages"
-             symbol_name="GtiL3Bank2Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Writes Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="3addf8ef-8e9b-40f5-a448-3dbb5d5128b0"
        chipset="KBLGT2"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="3addf8ef-8e9b-40f5-a448-3dbb5d5128b0"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiSoMemoryWrites"
-             description="The total number of GTI memory writes from Stream Output."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
+             description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Extended Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="4af0400a-81c3-47db-a6b6-deddbd75680e"
        chipset="KBLGT2"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="4af0400a-81c3-47db-a6b6-deddbd75680e"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
-             units="messages"
-             symbol_name="EuTypedAtomics0"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
              description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
              units="messages"
-             symbol_name="UntypedWrites0"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
              description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
              units="messages"
-             symbol_name="EuUntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
+             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
              description="The subslice 0 EU A64 Untyped Writes subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
              units="messages"
-             symbol_name="EuA64UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
-             data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
-             units="messages"
-             symbol_name="TypedReads0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedReads0"
-             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
-             data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
-             units="messages"
-             symbol_name="EuTypedReads0"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
              description="The ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="The ratio of EU typed read requests to L3 cache line reads."
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="0e22f995-79ca-4f67-83ab-e9d9772488d8"
        chipset="KBLGT2"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="0e22f995-79ca-4f67-83ab-e9d9772488d8"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
-             data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
-             data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu0Instruction"
-             semantic_type="duration"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
              units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
              units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
              units="messages"
-             symbol_name="SamplerAccesses"
              semantic_type="event"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="L3 Bank 01 Accesses"
-             description="The total number of accesses to L3 Bank 01."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to L3 Bank 01."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
              description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
              mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
-             units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="OA" address="0x00002794" value="0x0000FBEF" />
         <register type="OA" address="0x00002798" value="0x0007FFFA" />
         <register type="OA" address="0x0000279C" value="0x0000FBDF" />
-    </register_config>
-    <register_config type="FLEX">
-        <register type="FLEX" address="0x0000E458" value="0x00005004" />
-        <register type="FLEX" address="0x0000E558" value="0x00000003" />
-        <register type="FLEX" address="0x0000E658" value="0x00002001" />
-        <register type="FLEX" address="0x0000E758" value="0x00101100" />
-        <register type="FLEX" address="0x0000E45C" value="0x00201200" />
-        <register type="FLEX" address="0x0000E55C" value="0x00301300" />
-        <register type="FLEX" address="0x0000E65C" value="0x00401400" />
-    </register_config>
-  </set>
-
-  <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="bc2a00f7-cb8a-4ff2-8ad0-e241dad16937"
-       chipset="KBLGT2"
-       symbol_name="HDCAndSF"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+    </register_config>
+    <register_config type="FLEX">
+        <register type="FLEX" address="0x0000E458" value="0x00005004" />
+        <register type="FLEX" address="0x0000E558" value="0x00000003" />
+        <register type="FLEX" address="0x0000E658" value="0x00002001" />
+        <register type="FLEX" address="0x0000E758" value="0x00101100" />
+        <register type="FLEX" address="0x0000E45C" value="0x00201200" />
+        <register type="FLEX" address="0x0000E55C" value="0x00301300" />
+        <register type="FLEX" address="0x0000E65C" value="0x00401400" />
+    </register_config>
+  </set>
+
+  <set name="Metric set HDCAndSF"
+       chipset="KBLGT2"
+       symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="bc2a00f7-cb8a-4ff2-8ad0-e241dad16937"
+       >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="d2bbe790-f058-42d9-81c6-cdedcf655bc2"
        chipset="KBLGT2"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="d2bbe790-f058-42d9-81c6-cdedcf655bc2"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
-             units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="2f8e32e4-5956-46e2-af31-c8ea95887332"
        chipset="KBLGT2"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="2f8e32e4-5956-46e2-af31-c8ea95887332"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Stalled"
-             description="The percentage of time in which slice0 L3 bank2 is stalled"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_stalled"
              units="percent"
-             symbol_name="L30Bank2Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank2 Stalled"
+             symbol_name="L30Bank2Stalled"
+             underscore_name="l30_bank2_stalled"
+             description="The percentage of time in which slice0 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="ca046aad-b5fb-4101-adce-6473ee6e5b14"
        chipset="KBLGT2"
        symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="ca046aad-b5fb-4101-adce-6473ee6e5b14"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 L3 Bank3 Stalled"
-             description="The percentage of time in which slice0 L3 bank3 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_stalled"
-             units="percent"
-             symbol_name="L30Bank3Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank3 Stalled"
+             symbol_name="L30Bank3Stalled"
+             underscore_name="l30_bank3_stalled"
+             description="The percentage of time in which slice0 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="605f388f-24bb-455c-88e3-8d57ae0d7e9f"
        chipset="KBLGT2"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="605f388f-24bb-455c-88e3-8d57ae0d7e9f"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
-             units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set Sampler"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler"
-       hw_config_guid="31dd157c-bf4e-4bab-bf2b-f5c8174af1af"
        chipset="KBLGT2"
        symbol_name="Sampler"
+       underscore_name="sampler"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="31dd157c-bf4e-4bab-bf2b-f5c8174af1af"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
-             units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
-             units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice0 Subslice0 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
-             units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Slice0 Subslice1 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
-             units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
+             description="The percentage of time in which slice0 subslice0 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
+             description="The percentage of time in which slice0 subslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="105db928-5542-466b-9128-e1f3c91426cb"
        chipset="KBLGT2"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="105db928-5542-466b-9128-e1f3c91426cb"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="03db94d2-b37f-4c58-a791-0d2067b013bb"
        chipset="KBLGT2"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="03db94d2-b37f-4c58-a791-0d2067b013bb"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="aa7a3fb9-22fb-43ff-a32d-0ab6c13bbd16"
        chipset="KBLGT2"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="aa7a3fb9-22fb-43ff-a32d-0ab6c13bbd16"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active including Ext Math"
+             symbol_name="Fpu1ActiveAdjusted"
+             underscore_name="fpu1_active_adjusted"
              description="The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active_adjusted"
              units="percent"
-             symbol_name="Fpu1ActiveAdjusted"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
     </register_config>
   </set>
 
-  <set name="Media Vme Pipe Gen9"
-       mdapi_supported_apis="MEDIA IO BB"
-       underscore_name="vme_pipe"
-       hw_config_guid="398a4268-ef6f-4ffc-b55f-3c7b5363ce61"
+  <set name="Media Vme Pipe Gen9"
        chipset="KBLGT2"
        symbol_name="VMEPipe"
+       underscore_name="vme_pipe"
+       mdapi_supported_apis="MEDIA IO BB"
+       hw_config_guid="398a4268-ef6f-4ffc-b55f-3c7b5363ce61"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VME Busy"
-             description="The percentage of time in which VME (IME or CRE) was actively processing data."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vme_busy"
              units="percent"
-             symbol_name="VMEBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Overview System Batch Tier2"
-             mdapi_group="VME Pipe"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VME Busy"
+             symbol_name="VMEBusy"
+             underscore_name="vme_busy"
+             description="The percentage of time in which VME (IME or CRE) was actively processing data."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="VME Pipe"
+             mdapi_usage_flags="Overview System Batch Tier2"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="6c66fe6e-2988-454a-bfae-7fca3bbcbec2"
        chipset="KBLGT2"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="6c66fe6e-2988-454a-bfae-7fca3bbcbec2"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
              description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
              description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen9.5"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="baa3c7e4-52b6-4b85-801e-465a94b746dd"
        chipset="KBLGT2"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="baa3c7e4-52b6-4b85-801e-465a94b746dd"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
-       underscore_name="pma__stall"
-       hw_config_guid="b49aa434-4958-4d98-9e6f-443ff27ca74d"
        chipset="KBLGT2"
        symbol_name="PMA_Stall"
+       underscore_name="pma__stall"
+       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
+       hw_config_guid="b49aa434-4958-4d98-9e6f-443ff27ca74d"
        >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
              description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GPU/Stencil Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index 0d90451..de27ac1 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1553959549" merge_md5="">
   <set name="Render Metrics Basic Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="17b4f3e0-d578-4ae3-b7a8-98d756d1e0df"
        chipset="KBLGT3"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="17b4f3e0-d578-4ae3-b7a8-98d756d1e0df"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
-             data_type="uint64"
-             equation="B 4 READ B 5 READ UADD 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler1_bottleneck"
              units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
              description="The percentage of time in which Sampler 0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which Sampler 1 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="L3 Lookup Accesses w/o IC"
-             description="The total number of L3 cache lookup accesses w/o IC."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
              units="messages"
-             symbol_name="L3Lookups"
              semantic_type="event"
+             equation="B 4 READ B 5 READ UADD 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
+             description="The total number of L3 cache lookup accesses w/o IC."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Depth Throughput"
-             description="The total number of GPU memory bytes transferred between depth caches and GTI."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
+             description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
              description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
              mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Basic Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="9823aaa1-b06f-40ce-884b-cd798c79f0c2"
        chipset="KBLGT3"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="9823aaa1-b06f-40ce-884b-cd798c79f0c2"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="c7c735f3-ce58-45cf-aa04-30b183f1faff"
        chipset="KBLGT3"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="c7c735f3-ce58-45cf-aa04-30b183f1faff"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="VsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             high_watermark="15"
              data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
              units="percent"
-             symbol_name="SfBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
     <counter name="SO Bottleneck"
-             low_watermark="5"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
              description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Clipper Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
              data_type="float"
-             high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
     <counter name="Early Depth Bottleneck"
-             low_watermark="10"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
              description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="10"
              high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Reads Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="96ec2219-040b-428a-856a-6bc03363a057"
        chipset="KBLGT3"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="96ec2219-040b-428a-856a-6bc03363a057"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
-             data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
-             semantic_type="event"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
+             units="threads"
              semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
              units="messages"
-             symbol_name="GtiL3Bank1Reads"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
              description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
              units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
              units="messages"
-             symbol_name="GtiL3Bank2Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 5 READ"
              mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Writes Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="03372b64-4996-4d3b-aa18-790e75eeb9c2"
        chipset="KBLGT3"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="03372b64-4996-4d3b-aa18-790e75eeb9c2"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
-             data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiSoMemoryWrites"
-             description="The total number of GTI memory writes from Stream Output."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
+             description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             units="messages"
+             semantic_type="event"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Extended Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="31b4ce5a-bd61-4c1f-bb5d-f2e731412150"
        chipset="KBLGT3"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="31b4ce5a-bd61-4c1f-bb5d-f2e731412150"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
-             units="messages"
-             symbol_name="EuTypedAtomics0"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
              description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
              units="messages"
-             symbol_name="UntypedWrites0"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
              description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
              units="messages"
-             symbol_name="EuUntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
+             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
              description="The subslice 0 EU A64 Untyped Writes subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
              units="messages"
-             symbol_name="EuA64UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="B 7 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
-             data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
-             units="messages"
-             symbol_name="TypedReads0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedReads0"
-             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
-             data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
-             units="messages"
-             symbol_name="EuTypedReads0"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
              description="The ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="The ratio of EU typed read requests to L3 cache line reads."
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="2ce0911a-27fc-4887-96f0-11084fa807c3"
        chipset="KBLGT3"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="2ce0911a-27fc-4887-96f0-11084fa807c3"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu0Instruction"
-             semantic_type="duration"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
-             data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
-             units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
-             />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
-             data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
-             units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
-             />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
-             data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
-             units="messages"
-             symbol_name="SamplerAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="L3 Bank 01 Accesses"
-             description="The total number of accesses to L3 Bank 01."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to L3 Bank 01."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
              description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
              mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
-             units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="546c4c1d-99b8-42fb-a107-5aaabb5314a8"
        chipset="KBLGT3"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="546c4c1d-99b8-42fb-a107-5aaabb5314a8"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
              symbol_name="VsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss0)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="4e93d156-9b39-4268-8544-a8e0480806d7"
        chipset="KBLGT3"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="4e93d156-9b39-4268-8544-a8e0480806d7"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
-             units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="de1bec86-ca92-4b43-89fa-147653221cc0"
        chipset="KBLGT3"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="de1bec86-ca92-4b43-89fa-147653221cc0"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Stalled"
-             description="The percentage of time in which slice0 L3 bank2 is stalled"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_stalled"
              units="percent"
-             symbol_name="L30Bank2Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank2 Stalled"
+             symbol_name="L30Bank2Stalled"
+             underscore_name="l30_bank2_stalled"
+             description="The percentage of time in which slice0 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="e63537bb-10be-4d4a-92c4-c6b0c65e02ef"
        chipset="KBLGT3"
-       symbol_name="L3_3"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+       symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="e63537bb-10be-4d4a-92c4-c6b0c65e02ef"
+       >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 L3 Bank3 Stalled"
-             description="The percentage of time in which slice0 L3 bank3 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_stalled"
-             units="percent"
-             symbol_name="L30Bank3Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank3 Stalled"
+             symbol_name="L30Bank3Stalled"
+             underscore_name="l30_bank3_stalled"
+             description="The percentage of time in which slice0 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="7a03a9f8-ec5e-46bb-8b67-1f0ff1476281"
        chipset="KBLGT3"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="7a03a9f8-ec5e-46bb-8b67-1f0ff1476281"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
              units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set Sampler"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler"
-       hw_config_guid="b25d2ebf-a6e0-4b29-96be-a9b010edeeda"
        chipset="KBLGT3"
        symbol_name="Sampler"
+       underscore_name="sampler"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="b25d2ebf-a6e0-4b29-96be-a9b010edeeda"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
              units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
              units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
     <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
              description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
              description="The percentage of time in which slice0 subslice0 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
              units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
              description="The percentage of time in which slice0 subslice1 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
              units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="469a05e5-e299-46f7-9598-7b05f3c34991"
        chipset="KBLGT3"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="469a05e5-e299-46f7-9598-7b05f3c34991"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="52f925c6-786a-4ec6-86ce-cba85c83453a"
        chipset="KBLGT3"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="52f925c6-786a-4ec6-86ce-cba85c83453a"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
+             max_equation="100"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="efc497ac-884e-4ee4-a4a8-15fba22aaf21"
        chipset="KBLGT3"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="efc497ac-884e-4ee4-a4a8-15fba22aaf21"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active including Ext Math"
+             symbol_name="Fpu1ActiveAdjusted"
+             underscore_name="fpu1_active_adjusted"
              description="The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ  C 5 READ C 6 READ FADD C 7 READ FADD C 2 READ FADD C 3 READ FADD C 4 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active_adjusted"
              units="percent"
-             symbol_name="Fpu1ActiveAdjusted"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             equation="A 8 READ  C 5 READ C 6 READ FADD C 7 READ FADD C 2 READ FADD C 3 READ FADD C 4 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009888" value="0x02B14000" />
         <register type="NOA" address="0x00009888" value="0x02B20033" />
         <register type="NOA" address="0x00009888" value="0x00B20000" />
-        <register type="NOA" address="0x00009888" value="0x02B31000" />
-        <register type="NOA" address="0x00009888" value="0x00D08000" />
-        <register type="NOA" address="0x00009888" value="0x00D18000" />
-        <register type="NOA" address="0x00009888" value="0x00D21980" />
-        <register type="NOA" address="0x00009888" value="0x00D34000" />
-        <register type="NOA" address="0x00009888" value="0x1190FC00" />
-        <register type="NOA" address="0x00009888" value="0x37900000" />
-        <register type="NOA" address="0x00009888" value="0x51900000" />
-        <register type="NOA" address="0x00009888" value="0x41900C00" />
-        <register type="NOA" address="0x00009888" value="0x43900002" />
-        <register type="NOA" address="0x00009888" value="0x53900420" />
-        <register type="NOA" address="0x00009888" value="0x459000A1" />
-        <register type="NOA" address="0x00009888" value="0x33900000" />
-    </register_config>
-  </set>
-
-  <set name="Media Vme Pipe Gen9"
-       mdapi_supported_apis="MEDIA IO BB"
-       underscore_name="vme_pipe"
-       hw_config_guid="bfd9764d-2c5b-4c16-bfc1-89de3ca10917"
-       chipset="KBLGT3"
-       symbol_name="VMEPipe"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
+        <register type="NOA" address="0x00009888" value="0x02B31000" />
+        <register type="NOA" address="0x00009888" value="0x00D08000" />
+        <register type="NOA" address="0x00009888" value="0x00D18000" />
+        <register type="NOA" address="0x00009888" value="0x00D21980" />
+        <register type="NOA" address="0x00009888" value="0x00D34000" />
+        <register type="NOA" address="0x00009888" value="0x1190FC00" />
+        <register type="NOA" address="0x00009888" value="0x37900000" />
+        <register type="NOA" address="0x00009888" value="0x51900000" />
+        <register type="NOA" address="0x00009888" value="0x41900C00" />
+        <register type="NOA" address="0x00009888" value="0x43900002" />
+        <register type="NOA" address="0x00009888" value="0x53900420" />
+        <register type="NOA" address="0x00009888" value="0x459000A1" />
+        <register type="NOA" address="0x00009888" value="0x33900000" />
+    </register_config>
+  </set>
+
+  <set name="Media Vme Pipe Gen9"
+       chipset="KBLGT3"
+       symbol_name="VMEPipe"
+       underscore_name="vme_pipe"
+       mdapi_supported_apis="MEDIA IO BB"
+       hw_config_guid="bfd9764d-2c5b-4c16-bfc1-89de3ca10917"
+       >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VME Busy"
-             description="The percentage of time in which VME (IME or CRE) was actively processing data."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vme_busy"
              units="percent"
-             symbol_name="VMEBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Overview System Batch Tier2"
-             mdapi_group="VME Pipe"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VME Busy"
+             symbol_name="VMEBusy"
+             underscore_name="vme_busy"
+             description="The percentage of time in which VME (IME or CRE) was actively processing data."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="VME Pipe"
+             mdapi_usage_flags="Overview System Batch Tier2"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="b55ecba1-2aa9-422e-89ff-b9e30f03d447"
        chipset="KBLGT3"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="b55ecba1-2aa9-422e-89ff-b9e30f03d447"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Blitter Ring Busy"
-             description="The percentage of time when blitter command streamer was busy."
+    <counter name="Vdbox1 Ring Busy"
+             symbol_name="Vdbox1Busy"
+             underscore_name="vdbox1_busy"
+             description="The percentage of time when Vdbox1 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="AnyRingBusy"
-             description="The percentage of time when any command streamer was busy."
+    <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
+             description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vdbox1 Ring Busy"
-             description="The percentage of time when Vdbox1 command streamer was busy."
+    <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
+             description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox1_busy"
              units="percent"
-             symbol_name="Vdbox1Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="f1792f32-6db2-4b50-b4b2-557128f1688d"
        chipset="KBLGT3"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="f1792f32-6db2-4b50-b4b2-557128f1688d"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
-       underscore_name="pma__stall"
-       hw_config_guid="85bc2e4f-2563-4388-921b-dc0dad879cf3"
        chipset="KBLGT3"
        symbol_name="PMA_Stall"
+       underscore_name="pma__stall"
+       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
+       hw_config_guid="85bc2e4f-2563-4388-921b-dc0dad879cf3"
        >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
              description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GPU/Stencil Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index 4fe6f87..83c290e 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1553959547" merge_md5="">
   <set name="Render Metrics Basic Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="07b25942-d9fd-4fce-bd58-e29abd66b7de"
        chipset="SKLGT2"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="07b25942-d9fd-4fce-bd58-e29abd66b7de"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
-             data_type="uint64"
-             equation="B 4 READ 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler1_bottleneck"
              units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
              description="The percentage of time in which Sampler 0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which Sampler 1 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
+             equation="B 4 READ 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
     <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
              description="The total number of L3 cache lookup accesses w/o IC."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
              units="messages"
-             symbol_name="L3Lookups"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
              description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
              mdapi_group="GTI/Depth Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
+             description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI RCC Throughput"
-             description="The total number of GPU memory bytes transferred between render color caches and GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Color Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA"
                      availability="$SkuRevisionId 0x02 UGTE"
   </set>
 
   <set name="Compute Metrics Basic Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="00b80b4c-d215-4378-9015-da3dda3b61ea"
        chipset="SKLGT2"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="00b80b4c-d215-4378-9015-da3dda3b61ea"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA"
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="2a0c0933-37e7-427c-9951-ded42a78bb27"
        chipset="SKLGT2"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="2a0c0933-37e7-427c-9951-ded42a78bb27"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             high_watermark="15"
              data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
              units="percent"
-             symbol_name="SfBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
     <counter name="SO Bottleneck"
-             low_watermark="5"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
              description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Early Depth Bottleneck"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
+             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Clipper Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
-    <counter name="Early Depth Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
              data_type="float"
-             high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA"
                      availability="$SkuRevisionId 0x02 ULT"
   </set>
 
   <set name="Memory Reads Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="246b35f1-44e0-4d03-8936-e452e291d064"
        chipset="SKLGT2"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="246b35f1-44e0-4d03-8936-e452e291d064"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
              units="messages"
-             symbol_name="GtiL3Bank1Reads"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiCmdStreamerMemoryReads"
-             description="The total number of GTI memory reads from Command Streamer."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
              units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
+    <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
+             description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
              units="messages"
-             symbol_name="GtiL3Bank2Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA"
                      availability="$SliceMask 0x01 AND $SkuRevisionId 0x02 ULT &amp;&amp;"
   </set>
 
   <set name="Memory Writes Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="e0d3da02-00bf-4a96-9795-b48158c73a68"
        chipset="SKLGT2"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="e0d3da02-00bf-4a96-9795-b48158c73a68"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiSoMemoryWrites"
-             description="The total number of GTI memory writes from Stream Output."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
+             description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA"
                      availability="$SliceMask 0x01 AND $SkuRevisionId 0x02 ULT &amp;&amp;"
   </set>
 
   <set name="Compute Metrics Extended Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="c26b1fda-2752-4a33-a448-4c8718366846"
        chipset="SKLGT2"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="c26b1fda-2752-4a33-a448-4c8718366846"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
-             units="messages"
-             symbol_name="EuTypedAtomics0"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
              description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
              units="messages"
-             symbol_name="UntypedWrites0"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
              description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
              units="messages"
-             symbol_name="EuUntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
+             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
              description="The subslice 0 EU A64 Untyped Writes subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
              units="messages"
-             symbol_name="EuA64UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
-             data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
-             units="messages"
-             symbol_name="TypedReads0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedReads0"
-             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
-             data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
-             units="messages"
-             symbol_name="EuTypedReads0"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
              description="The ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="The ratio of EU typed read requests to L3 cache line reads."
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA"
                      availability="$SubsliceMask 0x01 AND"
   </set>
 
   <set name="Compute Metrics L3 Cache Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="9fb22842-e708-43f7-9752-e0e41670c39e"
        chipset="SKLGT2"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="9fb22842-e708-43f7-9752-e0e41670c39e"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
-             data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
-             data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu0Instruction"
-             semantic_type="duration"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
              units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
              units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
              units="messages"
-             symbol_name="SamplerAccesses"
              semantic_type="event"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="L3 Bank 01 Accesses"
-             description="The total number of accesses to L3 Bank 01."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to L3 Bank 01."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
              description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
              mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
-             units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="OA" address="0x00002794" value="0x0000FBEF" />
         <register type="OA" address="0x00002798" value="0x0007FFFA" />
         <register type="OA" address="0x0000279C" value="0x0000FBDF" />
-    </register_config>
-    <register_config type="FLEX">
-        <register type="FLEX" address="0x0000E458" value="0x00005004" />
-        <register type="FLEX" address="0x0000E558" value="0x00000003" />
-        <register type="FLEX" address="0x0000E658" value="0x00002001" />
-        <register type="FLEX" address="0x0000E758" value="0x00101100" />
-        <register type="FLEX" address="0x0000E45C" value="0x00201200" />
-        <register type="FLEX" address="0x0000E55C" value="0x00301300" />
-        <register type="FLEX" address="0x0000E65C" value="0x00401400" />
-    </register_config>
-  </set>
-
-  <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="5378e2a1-4248-4188-a4ae-da25a794c603"
-       chipset="SKLGT2"
-       symbol_name="HDCAndSF"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+    </register_config>
+    <register_config type="FLEX">
+        <register type="FLEX" address="0x0000E458" value="0x00005004" />
+        <register type="FLEX" address="0x0000E558" value="0x00000003" />
+        <register type="FLEX" address="0x0000E658" value="0x00002001" />
+        <register type="FLEX" address="0x0000E758" value="0x00101100" />
+        <register type="FLEX" address="0x0000E45C" value="0x00201200" />
+        <register type="FLEX" address="0x0000E55C" value="0x00301300" />
+        <register type="FLEX" address="0x0000E65C" value="0x00401400" />
+    </register_config>
+  </set>
+
+  <set name="Metric set HDCAndSF"
+       chipset="SKLGT2"
+       symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="5378e2a1-4248-4188-a4ae-da25a794c603"
+       >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="f42cdd6a-b000-42cb-870f-5eb423a7f514"
        chipset="SKLGT2"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="f42cdd6a-b000-42cb-870f-5eb423a7f514"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
-             units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set L3_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="b9bf2423-d88c-4a7b-a051-627611d00dcc"
        chipset="SKLGT2"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="b9bf2423-d88c-4a7b-a051-627611d00dcc"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Stalled"
-             description="The percentage of time in which slice0 L3 bank2 is stalled"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_stalled"
              units="percent"
-             symbol_name="L30Bank2Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank2 Stalled"
+             symbol_name="L30Bank2Stalled"
+             underscore_name="l30_bank2_stalled"
+             description="The percentage of time in which slice0 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set L3_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="2414a93d-d84f-406e-99c0-472161194b40"
        chipset="SKLGT2"
        symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="2414a93d-d84f-406e-99c0-472161194b40"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 L3 Bank3 Stalled"
-             description="The percentage of time in which slice0 L3 bank3 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_stalled"
-             units="percent"
-             symbol_name="L30Bank3Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank3 Stalled"
+             symbol_name="L30Bank3Stalled"
+             underscore_name="l30_bank3_stalled"
+             description="The percentage of time in which slice0 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="53a45d2d-170b-4cf5-b7bb-585120c8e2f5"
        chipset="SKLGT2"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="53a45d2d-170b-4cf5-b7bb-585120c8e2f5"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
-             units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set Sampler"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler"
-       hw_config_guid="b4cff514-a91e-4798-a0b3-426ca13fc9c1"
        chipset="SKLGT2"
        symbol_name="Sampler"
+       underscore_name="sampler"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="b4cff514-a91e-4798-a0b3-426ca13fc9c1"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
-             units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
-             units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Slice0 Subslice0 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
-             units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Slice0 Subslice1 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
-             units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
+             description="The percentage of time in which slice0 subslice0 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
+             description="The percentage of time in which slice0 subslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x000000A0" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="7821d13b-9b8b-4405-9618-78cd56b62cce"
        chipset="SKLGT2"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="7821d13b-9b8b-4405-9618-78cd56b62cce"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="893f1a4d-919d-4388-8cb7-746d73ea7259"
        chipset="SKLGT2"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="893f1a4d-919d-4388-8cb7-746d73ea7259"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="41a24047-7484-4ead-ae37-de907e5ff2b2"
        chipset="SKLGT2"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="41a24047-7484-4ead-ae37-de907e5ff2b2"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active including Ext Math"
+             symbol_name="Fpu1ActiveAdjusted"
+             underscore_name="fpu1_active_adjusted"
              description="The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active_adjusted"
              units="percent"
-             symbol_name="Fpu1ActiveAdjusted"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             equation="A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
     </register_config>
   </set>
 
-  <set name="Media Vme Pipe Gen9"
-       mdapi_supported_apis="MEDIA IO BB"
-       underscore_name="vme_pipe"
-       hw_config_guid="95910492-943f-44bd-9461-390240f243fd"
+  <set name="Media Vme Pipe Gen9"
        chipset="SKLGT2"
        symbol_name="VMEPipe"
+       underscore_name="vme_pipe"
+       mdapi_supported_apis="MEDIA IO BB"
+       hw_config_guid="95910492-943f-44bd-9461-390240f243fd"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VME Busy"
-             description="The percentage of time in which VME (IME or CRE) was actively processing data."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vme_busy"
              units="percent"
-             symbol_name="VMEBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Overview System Batch Tier2"
-             mdapi_group="VME Pipe"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VME Busy"
+             symbol_name="VMEBusy"
+             underscore_name="vme_busy"
+             description="The percentage of time in which VME (IME or CRE) was actively processing data."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="VME Pipe"
+             mdapi_usage_flags="Overview System Batch Tier2"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="c44a5cf7-886d-477b-bebd-2d738923e4c3"
        chipset="SKLGT2"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="c44a5cf7-886d-477b-bebd-2d738923e4c3"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
              description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
              description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="1651949f-0ac0-4cb1-a06f-dafd74a407d1"
        chipset="SKLGT2"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="1651949f-0ac0-4cb1-a06f-dafd74a407d1"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
-       underscore_name="pma__stall"
-       hw_config_guid="27871149-2fa9-40ba-aa73-350d60c03a09"
        chipset="SKLGT2"
        symbol_name="PMA_Stall"
+       underscore_name="pma__stall"
+       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
+       hw_config_guid="27871149-2fa9-40ba-aa73-350d60c03a09"
        >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
              description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GPU/Stencil Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index 7fc5e8d..46b0954 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1553959548" merge_md5="">
   <set name="Render Metrics Basic Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="21fef15a-83f4-4ffa-bb81-7da6e38b8e4b"
        chipset="SKLGT3"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="21fef15a-83f4-4ffa-bb81-7da6e38b8e4b"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
-             data_type="uint64"
-             equation="B 4 READ B 5 READ UADD 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler1_bottleneck"
              units="percent"
-             symbol_name="Sampler1Bottleneck"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
              description="The percentage of time in which Sampler 0 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
              description="The percentage of time in which Sampler 1 has been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
              mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
              description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
              mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Bottleneck"
+             symbol_name="Sampler1Bottleneck"
+             underscore_name="sampler1_bottleneck"
+             description="The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
+             equation="B 4 READ B 5 READ UADD 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
     <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
              description="The total number of L3 cache lookup accesses w/o IC."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
              units="messages"
-             symbol_name="L3Lookups"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
              description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
              mdapi_group="GTI/Depth Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
+             description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI RCC Throughput"
-             description="The total number of GPU memory bytes transferred between render color caches and GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Color Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck $Sampler1Bottleneck FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Basic Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="4320492b-fd03-42ac-922f-dbe1ef3b7b58"
        chipset="SKLGT3"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="4320492b-fd03-42ac-922f-dbe1ef3b7b58"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 27 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="UntypedBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
     <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
              description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
              units="bytes"
-             symbol_name="TypedBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="bd2d9cae-b9ec-4f5b-9d2f-934bed398a2d"
        chipset="SKLGT3"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="bd2d9cae-b9ec-4f5b-9d2f-934bed398a2d"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
+             max_equation="100"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             high_watermark="15"
              data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
              units="percent"
-             symbol_name="SfBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
              units="percent"
-             symbol_name="SfStall"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
     <counter name="SO Bottleneck"
-             low_watermark="5"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
              description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="5"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Early Depth Bottleneck"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
+             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Clipper Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
-    <counter name="Early Depth Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
              data_type="float"
-             high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Reads Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="4ca0f3fe-7fd3-4924-98cb-1807d9879767"
        chipset="SKLGT3"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="4ca0f3fe-7fd3-4924-98cb-1807d9879767"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
              underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 31 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
              units="messages"
-             symbol_name="GtiL3Bank1Reads"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiCmdStreamerMemoryReads"
-             description="The total number of GTI memory reads from Command Streamer."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
              units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
+    <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
+             description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
              units="messages"
-             symbol_name="GtiL3Bank2Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
+             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
+             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="GtiMscMemoryReads"
-             description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
              />
-    <counter name="GtiStcMemoryReads"
-             description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Writes Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="a0c0172c-ee13-403d-99ff-2bdf6936cf14"
        chipset="SKLGT3"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="a0c0172c-ee13-403d-99ff-2bdf6936cf14"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiSoMemoryWrites"
-             description="The total number of GTI memory writes from Stream Output."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
+             description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Extended Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="52435e0b-f188-42ea-8680-21a56ee20dee"
        chipset="SKLGT3"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="52435e0b-f188-42ea-8680-21a56ee20dee"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
-             semantic_type="event"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
-             units="messages"
-             symbol_name="EuTypedAtomics0"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+             data_type="uint64"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
              units="messages"
-             symbol_name="TypedAtomics0"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
              description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
              units="messages"
-             symbol_name="EuUntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
              units="messages"
-             symbol_name="UntypedWrites0"
              semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
+             description="The subslice 0 EU Typed Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
              description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
              units="messages"
-             symbol_name="EuUntypedAtomics0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
              units="messages"
-             symbol_name="EuUntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
+             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 6 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
              description="The subslice 0 EU A64 Untyped Writes subslice 0."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
              units="messages"
-             symbol_name="EuA64UntypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedWrites0"
-             description="The subslice 0 EU Typed Writes subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="C 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
-             data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
-             units="messages"
-             symbol_name="TypedReads0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EuA64UntypedReads0"
-             description="The subslice 0 EU A64 Untyped Reads subslice 0."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
+             description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
-             data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
-             units="messages"
-             symbol_name="EuTypedReads0"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
              description="The ratio of EU untyped read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="TypedReadsPerCacheLine"
-             description="The ratio of EU typed read requests to L3 cache line reads."
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="27076eeb-49f3-4fed-8423-c66506005c63"
        chipset="SKLGT3"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="27076eeb-49f3-4fed-8423-c66506005c63"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu0Instruction"
-             semantic_type="duration"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
-             data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
-             units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
-             />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
-             data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
-             units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
-             />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
-             data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
-             units="messages"
-             symbol_name="SamplerAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="L3 Bank 01 Accesses"
-             description="The total number of accesses to L3 Bank 01."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to L3 Bank 01."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
              description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
+             />
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
              mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
-             units="bytes"
-             symbol_name="L3TotalThroughput"
-             semantic_type="throughput"
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="8071b409-c39a-4674-94d7-32962ecfb512"
        chipset="SKLGT3"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="8071b409-c39a-4674-94d7-32962ecfb512"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
              symbol_name="VsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss0)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="5e0b391e-9ea8-4901-b2ff-b64ff616c7ed"
        chipset="SKLGT3"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="5e0b391e-9ea8-4901-b2ff-b64ff616c7ed"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
-             units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="25dc828e-1d2d-426e-9546-a1d4233cdf16"
        chipset="SKLGT3"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="25dc828e-1d2d-426e-9546-a1d4233cdf16"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Stalled"
-             description="The percentage of time in which slice0 L3 bank2 is stalled"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_stalled"
              units="percent"
-             symbol_name="L30Bank2Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank2 Stalled"
+             symbol_name="L30Bank2Stalled"
+             underscore_name="l30_bank2_stalled"
+             description="The percentage of time in which slice0 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="3dba9405-2d7e-4d70-8199-e734e82fd6bf"
        chipset="SKLGT3"
-       symbol_name="L3_3"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
+       symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="3dba9405-2d7e-4d70-8199-e734e82fd6bf"
+       >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 L3 Bank3 Stalled"
-             description="The percentage of time in which slice0 L3 bank3 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_stalled"
-             units="percent"
-             symbol_name="L30Bank3Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
              description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank3 Stalled"
+             symbol_name="L30Bank3Stalled"
+             underscore_name="l30_bank3_stalled"
+             description="The percentage of time in which slice0 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="76935d7b-09c9-46bf-87f1-c18b4a86ebe5"
        chipset="SKLGT3"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="76935d7b-09c9-46bf-87f1-c18b4a86ebe5"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
              units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set Sampler"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler"
-       hw_config_guid="1b34c0d6-4f4c-4d7b-833f-4aaf236d87a6"
        chipset="SKLGT3"
        symbol_name="Sampler"
+       underscore_name="sampler"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="1b34c0d6-4f4c-4d7b-833f-4aaf236d87a6"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
              units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
              units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
     <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
              description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
              description="The percentage of time in which slice0 subslice0 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
              units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
              description="The percentage of time in which slice0 subslice1 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
              units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="b375c985-9953-455b-bda2-b03f7594e9db"
        chipset="SKLGT3"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="b375c985-9953-455b-bda2-b03f7594e9db"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="3e2be2bb-884a-49bb-82c5-2358e6bd5f2d"
        chipset="SKLGT3"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="3e2be2bb-884a-49bb-82c5-2358e6bd5f2d"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
+             max_equation="100"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="2d80a648-7b5a-4e92-bbe7-3b5c76f2e221"
        chipset="SKLGT3"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="2d80a648-7b5a-4e92-bbe7-3b5c76f2e221"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active including Ext Math"
+             symbol_name="Fpu1ActiveAdjusted"
+             underscore_name="fpu1_active_adjusted"
              description="The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ  C 5 READ C 6 READ FADD C 7 READ FADD C 2 READ FADD C 3 READ FADD C 4 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active_adjusted"
              units="percent"
-             symbol_name="Fpu1ActiveAdjusted"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             equation="A 8 READ  C 5 READ C 6 READ FADD C 7 READ FADD C 2 READ FADD C 3 READ FADD C 4 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009888" value="0x02B14000" />
         <register type="NOA" address="0x00009888" value="0x02B20033" />
         <register type="NOA" address="0x00009888" value="0x00B20000" />
-        <register type="NOA" address="0x00009888" value="0x02B31000" />
-        <register type="NOA" address="0x00009888" value="0x00D08000" />
-        <register type="NOA" address="0x00009888" value="0x00D18000" />
-        <register type="NOA" address="0x00009888" value="0x00D21980" />
-        <register type="NOA" address="0x00009888" value="0x00D34000" />
-        <register type="NOA" address="0x00009888" value="0x1190FC00" />
-        <register type="NOA" address="0x00009888" value="0x37900000" />
-        <register type="NOA" address="0x00009888" value="0x51900000" />
-        <register type="NOA" address="0x00009888" value="0x41900C00" />
-        <register type="NOA" address="0x00009888" value="0x43900402" />
-        <register type="NOA" address="0x00009888" value="0x53901550" />
-        <register type="NOA" address="0x00009888" value="0x45900080" />
-        <register type="NOA" address="0x00009888" value="0x33900000" />
-    </register_config>
-  </set>
-
-  <set name="Media Vme Pipe Gen9"
-       mdapi_supported_apis="MEDIA IO BB"
-       underscore_name="vme_pipe"
-       hw_config_guid="cfae9232-6ffc-42cc-a703-9790016925f0"
-       chipset="SKLGT3"
-       symbol_name="VMEPipe"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
+        <register type="NOA" address="0x00009888" value="0x02B31000" />
+        <register type="NOA" address="0x00009888" value="0x00D08000" />
+        <register type="NOA" address="0x00009888" value="0x00D18000" />
+        <register type="NOA" address="0x00009888" value="0x00D21980" />
+        <register type="NOA" address="0x00009888" value="0x00D34000" />
+        <register type="NOA" address="0x00009888" value="0x1190FC00" />
+        <register type="NOA" address="0x00009888" value="0x37900000" />
+        <register type="NOA" address="0x00009888" value="0x51900000" />
+        <register type="NOA" address="0x00009888" value="0x41900C00" />
+        <register type="NOA" address="0x00009888" value="0x43900402" />
+        <register type="NOA" address="0x00009888" value="0x53901550" />
+        <register type="NOA" address="0x00009888" value="0x45900080" />
+        <register type="NOA" address="0x00009888" value="0x33900000" />
+    </register_config>
+  </set>
+
+  <set name="Media Vme Pipe Gen9"
+       chipset="SKLGT3"
+       symbol_name="VMEPipe"
+       underscore_name="vme_pipe"
+       mdapi_supported_apis="MEDIA IO BB"
+       hw_config_guid="cfae9232-6ffc-42cc-a703-9790016925f0"
+       >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VME Busy"
-             description="The percentage of time in which VME (IME or CRE) was actively processing data."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vme_busy"
              units="percent"
-             symbol_name="VMEBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Overview System Batch Tier2"
-             mdapi_group="VME Pipe"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VME Busy"
+             symbol_name="VMEBusy"
+             underscore_name="vme_busy"
+             description="The percentage of time in which VME (IME or CRE) was actively processing data."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="VME Pipe"
+             mdapi_usage_flags="Overview System Batch Tier2"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="0c5058ff-fdf4-4e0d-81fb-c0310fb76525"
        chipset="SKLGT3"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="0c5058ff-fdf4-4e0d-81fb-c0310fb76525"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Blitter Ring Busy"
-             description="The percentage of time when blitter command streamer was busy."
+    <counter name="Vdbox1 Ring Busy"
+             symbol_name="Vdbox1Busy"
+             underscore_name="vdbox1_busy"
+             description="The percentage of time when Vdbox1 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="AnyRingBusy"
-             description="The percentage of time when any command streamer was busy."
+    <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
+             description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vdbox1 Ring Busy"
-             description="The percentage of time when Vdbox1 command streamer was busy."
+    <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
+             description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox1_busy"
              units="percent"
-             symbol_name="Vdbox1Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="2b985803-d3c9-4629-8a4f-634bfecba0e8"
        chipset="SKLGT3"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="2b985803-d3c9-4629-8a4f-634bfecba0e8"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
-       underscore_name="pma__stall"
-       hw_config_guid="c889fd93-8dc7-4ba5-9451-de34a8b5ea3f"
        chipset="SKLGT3"
        symbol_name="PMA_Stall"
+       underscore_name="pma__stall"
+       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
+       hw_config_guid="c889fd93-8dc7-4ba5-9451-de34a8b5ea3f"
        >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
              description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GPU/Stencil Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index 30a1d17..d8ed378 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1553959548" merge_md5="">
   <set name="Render Metrics Basic Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="95322a71-bb05-4437-bc27-f7dd7b27d136"
        chipset="SKLGT4"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="95322a71-bb05-4437-bc27-f7dd7b27d136"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="cycles"
              semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Misses 64 UMUL"
-             underscore_name="gti_l3_throughput"
-             units="bytes"
-             symbol_name="GtiL3Throughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Cache Misses"
-             description="The total number of sampler cache misses in all LODs in all sampler units."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 4 READ B 5 READ UADD B 3 READ UADD 8 UMUL"
-             underscore_name="sampler_l1_misses"
-             units="messages"
-             symbol_name="SamplerL1Misses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$SamplerL1Misses 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Sampler 0 Busy"
-             description="The percentage of time in which Sampler 0 has been processing EU requests."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler0_busy"
              units="percent"
-             symbol_name="Sampler0Busy"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler 1 Busy"
-             description="The percentage of time in which Sampler 1 has been processing EU requests."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler1_busy"
              units="percent"
-             symbol_name="Sampler1Busy"
-             availability="$SubsliceMask 0x12 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samplers Busy"
-             description="The percentage of time in which samplers have been processing EU requests."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="$Sampler0Busy $Sampler1Busy FMAX"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Fixed Pipe Throughput"
-             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="B 6 READ B 7 READ UADD 64 UMUL"
-             underscore_name="gti_vf_throughput"
-             units="bytes"
-             symbol_name="GtiVfThroughput"
-             semantic_type="throughput"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/3D Pipe"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler 0 Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
-             high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="sampler0_bottleneck"
              units="percent"
-             symbol_name="Sampler0Bottleneck"
-             availability="$SubsliceMask 0x09 AND"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Sampler 0 Busy"
+             symbol_name="Sampler0Busy"
+             underscore_name="sampler0_busy"
+             description="The percentage of time in which Sampler 0 has been processing EU requests."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Sampler 1 Busy"
+             symbol_name="Sampler1Busy"
+             underscore_name="sampler1_busy"
+             description="The percentage of time in which Sampler 1 has been processing EU requests."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x12 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+    <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
+             description="The percentage of time in which samplers have been processing EU requests."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Busy $Sampler1Busy FMAX"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Sampler 0 Bottleneck"
+             symbol_name="Sampler0Bottleneck"
+             underscore_name="sampler0_bottleneck"
+             description="The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x09 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="L3 Lookup Accesses w/o IC"
-             description="The total number of L3 cache lookup accesses w/o IC."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
-             underscore_name="l3_lookups"
-             units="messages"
-             symbol_name="L3Lookups"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+             units="texels"
+             semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Sampler Cache Misses"
+             symbol_name="SamplerL1Misses"
+             underscore_name="sampler_l1_misses"
+             description="The total number of sampler cache misses in all LODs in all sampler units."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ B 5 READ UADD B 3 READ UADD 8 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Depth Throughput"
-             description="The total number of GPU memory bytes transferred between depth caches and GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 0 READ C 1 READ UADD 64 UMUL"
-             underscore_name="gti_depth_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiDepthThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Depth Cache"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="L3 Lookup Accesses w/o IC"
+             symbol_name="L3Lookups"
+             underscore_name="l3_lookups"
+             description="The total number of L3 cache lookup accesses w/o IC."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="$SamplerL1Misses $ShaderMemoryAccesses UADD"
+             mdapi_group="L3/TAG"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$SamplerL1Misses 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
-             data_type="float"
-             high_watermark="15"
-             equation="$Sampler0Bottleneck"
-             max_equation="100"
-             underscore_name="sampler_bottleneck"
-             units="percent"
-             symbol_name="SamplerBottleneck"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI HDC TLB Lookup Throughput"
-             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
+    <counter name="GTI Fixed Pipe Throughput"
+             symbol_name="GtiVfThroughput"
+             underscore_name="gti_vf_throughput"
+             description="The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_hdc_lookups_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="GtiHdcLookupsThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
+             equation="B 6 READ B 7 READ UADD 64 UMUL"
+             mdapi_group="GTI/3D Pipe"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI RCC Throughput"
-             description="The total number of GPU memory bytes transferred between render color caches and GTI."
+    <counter name="GTI Depth Throughput"
+             symbol_name="GtiDepthThroughput"
+             underscore_name="gti_depth_throughput"
+             description="The total number of GPU memory bytes transferred between depth caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 2 READ C 3 READ UADD 64 UMUL"
-             underscore_name="gti_rcc_throughput"
              units="bytes"
-             symbol_name="GtiRccThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ C 1 READ UADD 64 UMUL"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/Color Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI RCC Throughput"
+             symbol_name="GtiRccThroughput"
+             underscore_name="gti_rcc_throughput"
+             description="The total number of GPU memory bytes transferred between render color caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="C 2 READ C 3 READ UADD 64 UMUL"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             equation="$L3Misses 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI HDC TLB Lookup Throughput"
+             symbol_name="GtiHdcLookupsThroughput"
+             underscore_name="gti_hdc_lookups_throughput"
+             description="The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 6 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="$Sampler0Bottleneck"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Basic Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="7277228f-e7f3-4743-945a-6a2049d11377"
        chipset="SKLGT4"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="7277228f-e7f3-4743-945a-6a2049d11377"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_read"
-             units="bytes"
-             symbol_name="UntypedBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Typed Bytes Written"
-             description="The total number of untyped memory bytes written via Data Port."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_written"
-             units="bytes"
-             symbol_name="TypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
-             units="percent"
-             symbol_name="Fpu0Active"
-             semantic_type="duration"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU AVG IPC Rate"
-             description="The average rate of IPC calculated for 2 FPU pipelines."
-             data_type="float"
-             max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
-             units="number"
-             symbol_name="EuAvgIpcRate"
-             semantic_type="ratio"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
+    <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
+             description="The average rate of IPC calculated for 2 FPU pipelines."
+             data_type="float"
+             max_equation="2"
+             units="number"
+             semantic_type="ratio"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes"
-             description="The total number of untyped memory bytes written via Data Port."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="untyped_bytes_written"
-             units="bytes"
-             symbol_name="UntypedBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="Typed Bytes Read"
-             description="The total number of typed memory bytes read via Data Port."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
-             underscore_name="typed_bytes_read"
-             units="bytes"
-             symbol_name="TypedBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO OCL BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 5 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Typed Bytes Read"
+             symbol_name="TypedBytesRead"
+             underscore_name="typed_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Typed Bytes Written"
+             symbol_name="TypedBytesWritten"
+             underscore_name="typed_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Untyped Bytes Read"
+             symbol_name="UntypedBytesRead"
+             underscore_name="untyped_bytes_read"
+             description="The total number of typed memory bytes read via Data Port."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Untyped Writes"
+             symbol_name="UntypedBytesWritten"
+             underscore_name="untyped_bytes_written"
+             description="The total number of untyped memory bytes written via Data Port."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 4 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO OCL BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 32 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="C 5 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="463c668c-3f60-49b6-8f85-d995b635b3b2"
        chipset="SKLGT4"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="463c668c-3f60-49b6-8f85-d995b635b3b2"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
-             high_watermark="15"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hi_depth_bottleneck"
+             max_equation="100"
              units="percent"
-             symbol_name="HiDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="bc_bottleneck"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
-             data_type="float"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
-             units="percent"
-             symbol_name="HsStall"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Hull Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             high_watermark="15"
              data_type="float"
-             high_watermark="10"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_bottleneck"
              units="percent"
-             symbol_name="SfBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
-             data_type="float"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
-             units="percent"
-             symbol_name="SfStall"
-             semantic_type="duration"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="HS Bottleneck"
-             low_watermark="3"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
              description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="3"
              high_watermark="9"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Clipper"
              />
-    <counter name="SO Bottleneck"
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="SO Bottleneck"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
+             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Clipper Bottleneck"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
+             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Clipper Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
              data_type="float"
-             high_watermark="30"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Clipper"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier4 Correlate Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Domain Shader"
              />
     <counter name="Early Depth Bottleneck"
-             low_watermark="10"
+             symbol_name="EarlyDepthBottleneck"
+             underscore_name="early_depth_bottleneck"
              description="The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="10"
              high_watermark="30"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="early_depth_bottleneck"
+             data_type="float"
              units="percent"
-             symbol_name="EarlyDepthBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
              description="The percentage of time in which stream-output pipeline stage was stalled."
              data_type="float"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
              mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Stream Output"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
+             data_type="float"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier4 Correlate Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Reads Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_reads"
-       hw_config_guid="3ae6e74c-72c3-4040-9bd0-7961430b8cc8"
        chipset="SKLGT4"
        symbol_name="MemoryReads"
+       underscore_name="memory_reads"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="3ae6e74c-72c3-4040-9bd0-7961430b8cc8"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank0Reads"
-             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_reads"
-             units="messages"
-             symbol_name="GtiL3Bank0Reads"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all accesses from GTI to the ring."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="GtiL3Bank3Reads"
-             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
-             data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_reads"
-             units="messages"
-             symbol_name="GtiL3Bank3Reads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiRsMemoryReads"
-             description="The total number of GTI memory reads from Resource Streamer."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_rs_memory_reads"
-             units="messages"
-             symbol_name="GtiRsMemoryReads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Resource Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiHizMemoryReads"
-             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_reads"
-             units="messages"
-             symbol_name="GtiHizMemoryReads"
+             units="pixels"
              semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 29 READ 4 UMUL"
              mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="GtiRccMemoryReads"
-             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_reads"
-             units="messages"
-             symbol_name="GtiRccMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiL3Bank1Reads"
-             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
-             data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_reads"
-             units="messages"
-             symbol_name="GtiL3Bank1Reads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GtiCmdStreamerMemoryReads"
-             description="The total number of GTI memory reads from Command Streamer."
-             data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_reads"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryReads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
-             />
-    <counter name="GtiL3Bank2Reads"
-             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
-             data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_reads"
-             units="messages"
-             symbol_name="GtiL3Bank2Reads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="GTI/L3"
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiMemoryReads"
-             description="The total number of GTI memory reads."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_reads"
              units="messages"
-             symbol_name="GtiMemoryReads"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="GtiRczMemoryReads"
-             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_reads"
              units="messages"
-             symbol_name="GtiRczMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiCmdStreamerMemoryReads"
+             symbol_name="GtiCmdStreamerMemoryReads"
+             underscore_name="gti_cmd_streamer_memory_reads"
+             description="The total number of GTI memory reads from Command Streamer."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiRsMemoryReads"
+             symbol_name="GtiRsMemoryReads"
+             underscore_name="gti_rs_memory_reads"
+             description="The total number of GTI memory reads from Resource Streamer."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Resource Streamer"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiVfMemoryReads"
+             symbol_name="GtiVfMemoryReads"
+             underscore_name="gti_vf_memory_reads"
+             description="The total number of GTI memory reads from Vertex Fetch."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="GTI/3D Pipe/Vertex Fetch"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRccMemoryReads"
+             symbol_name="GtiRccMemoryReads"
+             underscore_name="gti_rcc_memory_reads"
+             description="The total number of GTI memory reads from Render Color Cache (Render Color Cache misses)."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="GtiMscMemoryReads"
+             symbol_name="GtiMscMemoryReads"
+             underscore_name="gti_msc_memory_reads"
              description="The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses)."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_reads"
              units="messages"
-             symbol_name="GtiMscMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiVfMemoryReads"
-             description="The total number of GTI memory reads from Vertex Fetch."
+    <counter name="GtiHizMemoryReads"
+             symbol_name="GtiHizMemoryReads"
+             underscore_name="gti_hiz_memory_reads"
+             description="The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses)."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="gti_vf_memory_reads"
              units="messages"
-             symbol_name="GtiVfMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Vertex Fetch"
              />
     <counter name="GtiStcMemoryReads"
+             symbol_name="GtiStcMemoryReads"
+             underscore_name="gti_stc_memory_reads"
              description="The total number of GTI memory reads from Stencil Cache (Stencil Cache misses)."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_reads"
              units="messages"
-             symbol_name="GtiStcMemoryReads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GtiRczMemoryReads"
+             symbol_name="GtiRczMemoryReads"
+             underscore_name="gti_rcz_memory_reads"
+             description="The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 7 READ"
              mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryReads"
+             symbol_name="GtiMemoryReads"
+             underscore_name="gti_memory_reads"
+             description="The total number of GTI memory reads."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="GtiL3Bank0Reads"
+             symbol_name="GtiL3Bank0Reads"
+             underscore_name="gti_l3_bank0_reads"
+             description="The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank1Reads"
+             symbol_name="GtiL3Bank1Reads"
+             underscore_name="gti_l3_bank1_reads"
+             description="The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="GtiL3Bank2Reads"
+             symbol_name="GtiL3Bank2Reads"
+             underscore_name="gti_l3_bank2_reads"
+             description="The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="GtiL3Bank3Reads"
+             symbol_name="GtiL3Bank3Reads"
+             underscore_name="gti_l3_bank3_reads"
+             description="The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="GtiL3Reads"
+             symbol_name="GtiL3Reads"
+             underscore_name="gti_l3_reads"
              description="The total number of GTI memory reads from L3 (L3 Cache misses)."
              data_type="uint64"
-             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
-             underscore_name="gti_l3_reads"
              units="messages"
-             symbol_name="GtiL3Reads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all accesses from GTI to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Memory Writes Distribution Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="memory_writes"
-       hw_config_guid="055f256d-4052-467c-8dec-6064a4806433"
        chipset="SKLGT4"
        symbol_name="MemoryWrites"
+       underscore_name="memory_writes"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="055f256d-4052-467c-8dec-6064a4806433"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GtiMemoryWrites"
-             description="The total number of GTI memory writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="gti_memory_writes"
-             units="messages"
-             symbol_name="GtiMemoryWrites"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GtiRingAccesses"
-             description="The total number of all GTI accesses to the ring."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="C 3 READ 2 UMUL"
-             underscore_name="gti_ring_accesses"
-             units="messages"
-             symbol_name="GtiRingAccesses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="GtiMscMemoryWrites"
-             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="gti_msc_memory_writes"
-             units="messages"
-             symbol_name="GtiMscMemoryWrites"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="GtiCmdStreamerMemoryWrites"
-             description="The total number of GTI memory writes from Command Streamer."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="gti_cmd_streamer_memory_writes"
-             units="messages"
-             symbol_name="GtiCmdStreamerMemoryWrites"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Command Streamer"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GtiL3Bank0Writes"
-             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="gti_l3_bank0_writes"
-             units="messages"
-             symbol_name="GtiL3Bank0Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank1Writes"
-             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 5 READ"
-             underscore_name="gti_l3_bank1_writes"
-             units="messages"
-             symbol_name="GtiL3Bank1Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank2Writes"
-             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="C 6 READ"
-             underscore_name="gti_l3_bank2_writes"
-             units="messages"
-             symbol_name="GtiL3Bank2Writes"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Bank3Writes"
-             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="gti_l3_bank3_writes"
-             units="messages"
-             symbol_name="GtiL3Bank3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GtiL3Writes"
-             description="The total number of GTI memory writes from L3 (L3 invalidations)."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
-             underscore_name="gti_l3_writes"
-             units="messages"
-             symbol_name="GtiL3Writes"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/L3"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiRccMemoryWrites"
-             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="gti_rcc_memory_writes"
              units="messages"
-             symbol_name="GtiRccMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Color Cache"
              />
-    <counter name="GtiSoMemoryWrites"
-             description="The total number of GTI memory writes from Stream Output."
+    <counter name="GtiCmdStreamerMemoryWrites"
+             symbol_name="GtiCmdStreamerMemoryWrites"
+             underscore_name="gti_cmd_streamer_memory_writes"
+             description="The total number of GTI memory writes from Command Streamer."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="gti_so_memory_writes"
              units="messages"
-             symbol_name="GtiSoMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 0 READ"
+             mdapi_group="GTI/3D Pipe/Command Streamer"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/3D Pipe/Stream Output"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GtiStcMemoryWrites"
-             description="The total number of GTI memory writes from Stencil Cache."
+    <counter name="GtiSoMemoryWrites"
+             symbol_name="GtiSoMemoryWrites"
+             underscore_name="gti_so_memory_writes"
+             description="The total number of GTI memory writes from Stream Output."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="gti_stc_memory_writes"
              units="messages"
-             symbol_name="GtiStcMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ"
+             mdapi_group="GTI/3D Pipe/Stream Output"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="GtiRccMemoryWrites"
+             symbol_name="GtiRccMemoryWrites"
+             underscore_name="gti_rcc_memory_writes"
+             description="The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations)."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
+             equation="B 3 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="GtiMscMemoryWrites"
+             symbol_name="GtiMscMemoryWrites"
+             underscore_name="gti_msc_memory_writes"
+             description="The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations)."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="B 4 READ"
+             mdapi_group="GTI/Color Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="GtiHizMemoryWrites"
+             symbol_name="GtiHizMemoryWrites"
+             underscore_name="gti_hiz_memory_writes"
+             description="The total number of GTI memory writes from Hierarchical Depth Cache."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="B 5 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="GtiStcMemoryWrites"
+             symbol_name="GtiStcMemoryWrites"
+             underscore_name="gti_stc_memory_writes"
+             description="The total number of GTI memory writes from Stencil Cache."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
+             equation="B 6 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="GtiRczMemoryWrites"
+             symbol_name="GtiRczMemoryWrites"
+             underscore_name="gti_rcz_memory_writes"
+             description="The total number of GTI memory writes from Render Depth Cache."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
+             equation="B 7 READ"
+             mdapi_group="GTI/Depth Cache"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="GtiMemoryWrites"
+             symbol_name="GtiMemoryWrites"
+             underscore_name="gti_memory_writes"
+             description="The total number of GTI memory writes."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="GtiL3Bank0Writes"
+             symbol_name="GtiL3Bank0Writes"
+             underscore_name="gti_l3_bank0_writes"
+             description="The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations)."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 4 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GtiRczMemoryWrites"
-             description="The total number of GTI memory writes from Render Depth Cache."
+    <counter name="GtiL3Bank1Writes"
+             symbol_name="GtiL3Bank1Writes"
+             underscore_name="gti_l3_bank1_writes"
+             description="The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations)."
              data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="gti_rcz_memory_writes"
              units="messages"
-             symbol_name="GtiRczMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 5 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GtiL3Bank2Writes"
+             symbol_name="GtiL3Bank2Writes"
+             underscore_name="gti_l3_bank2_writes"
+             description="The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations)."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="C 6 READ"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="GtiL3Bank3Writes"
+             symbol_name="GtiL3Bank3Writes"
+             underscore_name="gti_l3_bank3_writes"
+             description="The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations)."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="C 7 READ"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="GtiHizMemoryWrites"
-             description="The total number of GTI memory writes from Hierarchical Depth Cache."
+    <counter name="GtiL3Writes"
+             symbol_name="GtiL3Writes"
+             underscore_name="gti_l3_writes"
+             description="The total number of GTI memory writes from L3 (L3 invalidations)."
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="gti_hiz_memory_writes"
              units="messages"
-             symbol_name="GtiHizMemoryWrites"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="$GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GTI/Depth Cache"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GtiRingAccesses"
+             symbol_name="GtiRingAccesses"
+             underscore_name="gti_ring_accesses"
+             description="The total number of all GTI accesses to the ring."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="C 3 READ 2 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics Extended Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extended"
-       hw_config_guid="753972d4-87cd-4460-824d-754463ac5054"
        chipset="SKLGT4"
        symbol_name="ComputeExtended"
+       underscore_name="compute_extended"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="753972d4-87cd-4460-824d-754463ac5054"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Typed Writes 0"
-             description="The subslice 0 typed writes."
-             data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="typed_writes0"
-             units="messages"
-             symbol_name="TypedWrites0"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="EuTypedAtomics0"
-             description="The subslice 0 EU Typed Atomics subslice 0."
-             data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="eu_typed_atomics0"
-             units="messages"
-             symbol_name="EuTypedAtomics0"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="Typed Atomics 0"
-             description="The subslice 0 typed atomics."
-             data_type="uint64"
-             equation="C 4 READ"
-             underscore_name="typed_atomics0"
-             units="messages"
-             symbol_name="TypedAtomics0"
-             semantic_type="event"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="TypedAtomicsPerCacheLine"
-             description="The ratio of EU typed atomics requests to L3 cache line writes."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
-             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
-             underscore_name="typed_atomics_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedAtomicsPerCacheLine"
-             semantic_type="ratio"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuUntypedReads0"
-             description="The subslice 0 EU Untyped Reads subslice 0."
-             data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="eu_untyped_reads0"
-             units="messages"
-             symbol_name="EuUntypedReads0"
-             semantic_type="event"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Untyped Writes 0"
-             description="The subslice 0 untyped writes (including SLM writes)."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="untyped_writes0"
-             units="messages"
-             symbol_name="UntypedWrites0"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuUntypedAtomics0"
-             description="The subslice 0 EU Untyped Atomics subslice 0."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="eu_untyped_atomics0"
-             units="messages"
-             symbol_name="EuUntypedAtomics0"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EuUntypedWrites0"
-             description="The subslice 0 EU Untyped Writes subslice 0."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="eu_untyped_writes0"
-             units="messages"
-             symbol_name="EuUntypedWrites0"
-             semantic_type="event"
-             mdapi_supported_apis=""
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="EuA64UntypedWrites0"
-             description="The subslice 0 EU A64 Untyped Writes subslice 0."
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="eu_a64_untyped_writes0"
-             units="messages"
-             symbol_name="EuA64UntypedWrites0"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="UntypedWritesPerCacheLine"
-             description="The ratio of EU untyped write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
-             underscore_name="untyped_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
              units="messages"
-             symbol_name="ShaderBarriers"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="messages"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="EuUntypedReads0"
+             symbol_name="EuUntypedReads0"
+             underscore_name="eu_untyped_reads0"
+             description="The subslice 0 EU Untyped Reads subslice 0."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="B 0 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="EuTypedReads0"
+             symbol_name="EuTypedReads0"
+             underscore_name="eu_typed_reads0"
+             description="The subslice 0 EU Typed Reads subslice 0."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="EuUntypedWrites0"
+             symbol_name="EuUntypedWrites0"
+             underscore_name="eu_untyped_writes0"
+             description="The subslice 0 EU Untyped Writes subslice 0."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+             units="messages"
+             semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="EuTypedWrites0"
+             symbol_name="EuTypedWrites0"
+             underscore_name="eu_typed_writes0"
              description="The subslice 0 EU Typed Writes subslice 0."
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="eu_typed_writes0"
              units="messages"
-             symbol_name="EuTypedWrites0"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="subslice"
+             equation="B 3 READ"
              mdapi_group="L3/Data Port"
-             />
-    <counter name="TypedWritesPerCacheLine"
-             description="The ratio of EU typed write requests to L3 cache line writes."
-             data_type="float"
-             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
-             underscore_name="typed_writes_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="TypedWritesPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Typed Reads 0"
-             description="The subslice 0 typed reads."
+    <counter name="EuUntypedAtomics0"
+             symbol_name="EuUntypedAtomics0"
+             underscore_name="eu_untyped_atomics0"
+             description="The subslice 0 EU Untyped Atomics subslice 0."
              data_type="uint64"
-             equation="C 2 READ"
-             underscore_name="typed_reads0"
              units="messages"
-             symbol_name="TypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Untyped Reads 0"
-             description="The subslice 0 untyped reads (including SLM reads)."
+    <counter name="EuTypedAtomics0"
+             symbol_name="EuTypedAtomics0"
+             underscore_name="eu_typed_atomics0"
+             description="The subslice 0 EU Typed Atomics subslice 0."
              data_type="uint64"
-             equation="C 3 READ"
-             underscore_name="untyped_reads0"
              units="messages"
-             symbol_name="UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
     <counter name="EuA64UntypedReads0"
+             symbol_name="EuA64UntypedReads0"
+             underscore_name="eu_a64_untyped_reads0"
              description="The subslice 0 EU A64 Untyped Reads subslice 0."
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="eu_a64_untyped_reads0"
              units="messages"
-             symbol_name="EuA64UntypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
+             />
+    <counter name="EuA64UntypedWrites0"
+             symbol_name="EuA64UntypedWrites0"
+             underscore_name="eu_a64_untyped_writes0"
+             description="The subslice 0 EU A64 Untyped Writes subslice 0."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="B 7 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
+    <counter name="Typed Reads 0"
+             symbol_name="TypedReads0"
+             underscore_name="typed_reads0"
+             description="The subslice 0 typed reads."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 2 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Typed Writes 0"
+             symbol_name="TypedWrites0"
+             underscore_name="typed_writes0"
+             description="The subslice 0 typed writes."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 0 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EuTypedReads0"
-             description="The subslice 0 EU Typed Reads subslice 0."
+    <counter name="Untyped Reads 0"
+             symbol_name="UntypedReads0"
+             underscore_name="untyped_reads0"
+             description="The subslice 0 untyped reads (including SLM reads)."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="eu_typed_reads0"
              units="messages"
-             symbol_name="EuTypedReads0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 3 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="UntypedReadsPerCacheLine"
-             description="The ratio of EU untyped read requests to L3 cache line reads."
-             data_type="float"
-             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
-             underscore_name="untyped_reads_per_cache_line"
-             units="eu sends to l3 cache lines"
-             symbol_name="UntypedReadsPerCacheLine"
-             semantic_type="ratio"
-             mdapi_supported_apis=""
+    <counter name="Untyped Writes 0"
+             symbol_name="UntypedWrites0"
+             underscore_name="untyped_writes0"
+             description="The subslice 0 untyped writes (including SLM writes)."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="C 1 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Typed Atomics 0"
+             symbol_name="TypedAtomics0"
+             underscore_name="typed_atomics0"
+             description="The subslice 0 typed atomics."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
              units="messages"
-             symbol_name="ShaderMemoryAccesses"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="C 4 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="TypedReadsPerCacheLine"
+             symbol_name="TypedReadsPerCacheLine"
+             underscore_name="typed_reads_per_cache_line"
              description="The ratio of EU typed read requests to L3 cache line reads."
              data_type="float"
-             equation="$EuTypedReads0 $TypedReads0 FDIV"
-             underscore_name="typed_reads_per_cache_line"
              units="eu sends to l3 cache lines"
-             symbol_name="TypedReadsPerCacheLine"
              semantic_type="ratio"
-             mdapi_supported_apis=""
+             equation="$EuTypedReads0 $TypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+    <counter name="TypedWritesPerCacheLine"
+             symbol_name="TypedWritesPerCacheLine"
+             underscore_name="typed_writes_per_cache_line"
+             description="The ratio of EU typed write requests to L3 cache line writes."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedWrites0 $TypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="UntypedReadsPerCacheLine"
+             symbol_name="UntypedReadsPerCacheLine"
+             underscore_name="untyped_reads_per_cache_line"
+             description="The ratio of EU untyped read requests to L3 cache line reads."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="UntypedWritesPerCacheLine"
+             symbol_name="UntypedWritesPerCacheLine"
+             underscore_name="untyped_writes_per_cache_line"
+             description="The ratio of EU untyped write requests to L3 cache line writes."
+             data_type="float"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="TypedAtomicsPerCacheLine"
+             symbol_name="TypedAtomicsPerCacheLine"
+             underscore_name="typed_atomics_per_cache_line"
+             description="The ratio of EU typed atomics requests to L3 cache line writes."
              data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
+             units="eu sends to l3 cache lines"
+             semantic_type="ratio"
+             equation="$EuTypedAtomics0 $TypedAtomics0 FDIV"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Compute Metrics L3 Cache Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_l3_cache"
-       hw_config_guid="4e4392e9-8f73-457b-ab44-b49f7a0c733b"
        chipset="SKLGT4"
        symbol_name="ComputeL3Cache"
+       underscore_name="compute_l3_cache"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="4e4392e9-8f73-457b-ab44-b49f7a0c733b"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Bank 03 Accesses"
-             description="The total number of accesses to L3 Bank 03."
-             data_type="uint64"
-             equation="B 3 READ 2 UMUL"
-             underscore_name="l3_bank03_accesses"
-             units="messages"
-             symbol_name="L3Bank03Accesses"
-             availability="$SliceMask 0x01 AND"
-             semantic_type="event"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Accesses"
-             description="The total number of L3 accesses from all entities."
-             data_type="uint64"
-             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
-             underscore_name="l3_accesses"
-             units="messages"
-             symbol_name="L3Accesses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3"
              />
     <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
              description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Sampler Throughput"
-             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="A 29 READ 64 UMUL"
-             underscore_name="l3_sampler_throughput"
-             units="bytes"
-             symbol_name="L3SamplerThroughput"
-             semantic_type="throughput"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Sampler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU0 Pipe Active"
+             symbol_name="Fpu0Active"
+             underscore_name="fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu0_active"
              units="percent"
-             symbol_name="Fpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
              description="The average rate of IPC calculated for 2 FPU pipelines."
              data_type="float"
              max_equation="2"
-             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
              units="number"
-             symbol_name="EuAvgIpcRate"
              semantic_type="ratio"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             equation="A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu0_instruction"
              units="percent"
-             symbol_name="EuBinaryFpu0Instruction"
              semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU0 Hybrid Instruction"
+             symbol_name="EuHybridFpu0Instruction"
+             underscore_name="eu_hybrid_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU1 Hybrid Instruction"
+             symbol_name="EuHybridFpu1Instruction"
+             underscore_name="eu_hybrid_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="EU FPU0 Ternary Instruction"
+             symbol_name="EuTernaryFpu0Instruction"
+             underscore_name="eu_ternary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
+    <counter name="EU FPU1 Ternary Instruction"
+             symbol_name="EuTernaryFpu1Instruction"
+             underscore_name="eu_ternary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU FPU0 Binary Instruction"
+             symbol_name="EuBinaryFpu0Instruction"
+             underscore_name="eu_binary_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU0."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU FPU1 Binary Instruction"
+             symbol_name="EuBinaryFpu1Instruction"
+             underscore_name="eu_binary_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="EU FPU0 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU0."
+    <counter name="EU FPU0 Move Instruction"
+             symbol_name="EuMoveFpu0Instruction"
+             underscore_name="eu_move_fpu0_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu0_instruction"
              units="percent"
-             symbol_name="EuHybridFpu0Instruction"
              semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU1 Move Instruction"
+             symbol_name="EuMoveFpu1Instruction"
+             underscore_name="eu_move_fpu1_instruction"
+             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes/Instructions"
+             mdapi_usage_flags="Tier4 System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Misses"
-             description="The total number of L3 misses."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="C 4 READ C 5 READ UADD"
-             underscore_name="l3_misses"
-             units="messages"
-             symbol_name="L3Misses"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/TAG"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="L3 Bank 00 Accesses"
-             description="The total number of accesses to L3 Bank 00."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="C 0 READ 2 UMUL"
-             underscore_name="l3_bank00_accesses"
-             units="messages"
-             symbol_name="L3Bank00Accesses"
-             availability="$SliceMask 0x01 AND"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
-             />
-    <counter name="EU FPU0 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu0_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu0Instruction"
-             semantic_type="duration"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU FPU1 Hybrid Instruction"
-             description="The percentage of time in which execution units were actively processing hybrid instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_hybrid_fpu1_instruction"
-             units="percent"
-             symbol_name="EuHybridFpu1Instruction"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Accesses"
+             symbol_name="SamplerAccesses"
+             underscore_name="sampler_accesses"
+             description="The total number of messages send to samplers."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="messages"
              semantic_type="event"
+             equation="A 28 READ"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="C 6 READ 64 UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI L3 Throughput"
-             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="C 4 READ C 5 READ UADD 64 UMUL"
-             underscore_name="gti_l3_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiL3Throughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="GTI/L3"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Accesses"
-             description="The total number of accesses to L3 Bank 00 from IC cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
-             underscore_name="l3_bank00_ic_accesses"
              units="messages"
-             symbol_name="L3Bank00IcAccesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Bank 00 IC Hits"
-             description="The total number of hits in L3 Bank 00 from IC cache."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
-             underscore_name="l3_bank00_ic_hits"
              units="messages"
-             symbol_name="L3Bank00IcHits"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3/IC"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Accesses"
-             description="The total number of messages send to samplers."
+    <counter name="L3 Accesses"
+             symbol_name="L3Accesses"
+             underscore_name="l3_accesses"
+             description="The total number of L3 accesses from all entities."
              data_type="uint64"
-             equation="A 28 READ"
-             underscore_name="sampler_accesses"
              units="messages"
-             symbol_name="SamplerAccesses"
              semantic_type="event"
+             equation="C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler"
              />
-    <counter name="L3 Bank 01 Accesses"
-             description="The total number of accesses to L3 Bank 01."
+    <counter name="L3 Misses"
+             symbol_name="L3Misses"
+             underscore_name="l3_misses"
+             description="The total number of L3 misses."
              data_type="uint64"
-             equation="C 1 READ 2 UMUL"
-             underscore_name="l3_bank01_accesses"
              units="messages"
-             symbol_name="L3Bank01Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
+             equation="C 4 READ C 5 READ UADD"
+             mdapi_group="L3/TAG"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
+    <counter name="L3 Sampler Throughput"
+             symbol_name="L3SamplerThroughput"
+             underscore_name="l3_sampler_throughput"
+             description="The total number of GPU memory bytes transferred between samplers and L3 caches."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 29 READ 64 UMUL"
+             mdapi_group="L3/Sampler"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU0 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU0."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu0_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu0Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="L3 Total Throughput"
+             symbol_name="L3TotalThroughput"
+             underscore_name="l3_total_throughput"
+             description="The total number of GPU memory bytes transferred via L3."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="$L3Accesses 64 UMUL"
+             mdapi_group="L3"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="L3 Bank 00 Accesses"
+             symbol_name="L3Bank00Accesses"
+             underscore_name="l3_bank00_accesses"
+             description="The total number of accesses to L3 Bank 00."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
+             equation="C 0 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="L3 Bank 01 Accesses"
+             symbol_name="L3Bank01Accesses"
+             underscore_name="l3_bank01_accesses"
+             description="The total number of accesses to L3 Bank 01."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU FPU1 Move Instruction"
-             description="The percentage of time in which execution units were actively processing move instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_move_fpu1_instruction"
-             units="percent"
-             symbol_name="EuMoveFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             equation="C 1 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="L3 Bank 02 Accesses"
+             symbol_name="L3Bank02Accesses"
+             underscore_name="l3_bank02_accesses"
+             description="The total number of accesses to L3 Bank 02."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             equation="B 2 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="L3 Bank 03 Accesses"
+             symbol_name="L3Bank03Accesses"
+             underscore_name="l3_bank03_accesses"
+             description="The total number of accesses to L3 Bank 03."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             equation="B 3 READ 2 UMUL"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Bank 02 Accesses"
-             description="The total number of accesses to L3 Bank 02."
+    <counter name="L3 Bank 00 IC Accesses"
+             symbol_name="L3Bank00IcAccesses"
+             underscore_name="l3_bank00_ic_accesses"
+             description="The total number of accesses to L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="B 2 READ 2 UMUL"
-             underscore_name="l3_bank02_accesses"
              units="messages"
-             symbol_name="L3Bank02Accesses"
-             availability="$SliceMask 0x01 AND"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="slice"
-             mdapi_group="L3"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="L3 Bank 00 IC Hits"
+             symbol_name="L3Bank00IcHits"
+             underscore_name="l3_bank00_ic_hits"
+             description="The total number of hits in L3 Bank 00 from IC cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             equation="B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN"
+             availability="$SliceMask 0x01 AND"
+             mdapi_group="L3/IC"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="L3 Total Throughput"
-             description="The total number of GPU memory bytes transferred via L3."
+    <counter name="GTI L3 Throughput"
+             symbol_name="GtiL3Throughput"
+             underscore_name="gti_l3_throughput"
+             description="The total number of GPU memory bytes transferred between L3 caches and GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSlicesTotalCount UMUL"
-             equation="$L3Accesses 64 UMUL"
-             underscore_name="l3_total_throughput"
              units="bytes"
-             symbol_name="L3TotalThroughput"
              semantic_type="throughput"
+             equation="C 4 READ C 5 READ UADD 64 UMUL"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 32 UMUL"
-             equation="C 7 READ 64 UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="C 6 READ 64 UMUL"
              mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 32 UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="EU FPU1 Binary Instruction"
-             description="The percentage of time in which execution units were actively processing binary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_binary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuBinaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU FPU1 Ternary Instruction"
-             description="The percentage of time in which execution units were actively processing ternary instructions on FPU1."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_ternary_fpu1_instruction"
-             units="percent"
-             symbol_name="EuTernaryFpu1Instruction"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes/Instructions"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
-             units="percent"
-             symbol_name="EuSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             equation="C 7 READ 64 UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="730d95dd-7da8-4e1c-ab8d-c0eb1e4c1805"
        chipset="SKLGT4"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="730d95dd-7da8-4e1c-ab8d-c0eb1e4c1805"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="HDC stalled by L3 (s0.ss1)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="HDC stalled by L3 (s0.ss2)"
-             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="HDC stalled by L3 (s0.ss0)"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="HDC stalled by L3 (s0.ss1)"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+    <counter name="HDC stalled by L3 (s0.ss2)"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="d9e86d70-462b-462a-851e-fd63e8c13d63"
        chipset="SKLGT4"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="d9e86d70-462b-462a-851e-fd63e8c13d63"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank1 Active"
-             description="The percentage of time in which slice0 L3 bank1 is active"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_active"
-             units="percent"
-             symbol_name="L30Bank1Active"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="Slice0 L3 Bank1 Stalled"
-             description="The percentage of time in which slice0 L3 bank1 is stalled"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_stalled"
-             units="percent"
-             symbol_name="L30Bank1Stalled"
-             availability="$SliceMask 0x1 AND"
+             units="percent"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Active"
-             description="The percentage of time in which slice0 L3 bank0 is active"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_active"
              units="percent"
-             symbol_name="L30Bank0Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Stalled"
-             description="The percentage of time in which slice0 L3 bank0 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_stalled"
-             units="percent"
-             symbol_name="L30Bank0Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank0 Stalled"
+             symbol_name="L30Bank0Stalled"
+             underscore_name="l30_bank0_stalled"
+             description="The percentage of time in which slice0 L3 bank0 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Stalled"
+             symbol_name="L30Bank1Stalled"
+             underscore_name="l30_bank1_stalled"
+             description="The percentage of time in which slice0 L3 bank1 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank1 Active"
+             symbol_name="L30Bank1Active"
+             underscore_name="l30_bank1_active"
+             description="The percentage of time in which slice0 L3 bank1 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank0 Active"
+             symbol_name="L30Bank0Active"
+             underscore_name="l30_bank0_active"
+             description="The percentage of time in which slice0 L3 bank0 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="52200424-6ee9-48b3-b7fa-0afcf1975e4d"
        chipset="SKLGT4"
        symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="52200424-6ee9-48b3-b7fa-0afcf1975e4d"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Stalled"
-             description="The percentage of time in which slice0 L3 bank2 is stalled"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_stalled"
              units="percent"
-             symbol_name="L30Bank2Stalled"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
              description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank2 Active"
-             description="The percentage of time in which slice0 L3 bank2 is active"
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_active"
              units="percent"
-             symbol_name="L30Bank2Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank2 Stalled"
+             symbol_name="L30Bank2Stalled"
+             underscore_name="l30_bank2_stalled"
+             description="The percentage of time in which slice0 L3 bank2 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank2 Active"
+             symbol_name="L30Bank2Active"
+             underscore_name="l30_bank2_active"
+             description="The percentage of time in which slice0 L3 bank2 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set L3_3"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="1988315f-0a26-44df-acb0-df7ec86b1456"
        chipset="SKLGT4"
        symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="1988315f-0a26-44df-acb0-df7ec86b1456"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 6 READ"
              mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 L3 Bank3 Stalled"
-             description="The percentage of time in which slice0 L3 bank3 is stalled"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_stalled"
-             units="percent"
-             symbol_name="L30Bank3Stalled"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
-             data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank3 Active"
-             description="The percentage of time in which slice0 L3 bank3 is active"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_active"
              units="percent"
-             symbol_name="L30Bank3Active"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 30 READ 64 UMUL"
              mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
     <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
              description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             />
+    <counter name="Slice0 L3 Bank3 Stalled"
+             symbol_name="L30Bank3Stalled"
+             underscore_name="l30_bank3_stalled"
+             description="The percentage of time in which slice0 L3 bank3 is stalled"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Active"
+             symbol_name="L30Bank3Active"
+             underscore_name="l30_bank3_active"
+             description="The percentage of time in which slice0 L3 bank3 is active"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="f1f17ca7-286e-4ae5-9d15-9fccad6c665d"
        chipset="SKLGT4"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="f1f17ca7-286e-4ae5-9d15-9fccad6c665d"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="Slice0 Pixel Values Ready"
-             description="The percentage of time in which slice0 pixel values are ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values0_ready"
-             units="percent"
-             symbol_name="PixelValues0Ready"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
              units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data0_ready"
              units="percent"
-             symbol_name="PixelData0Ready"
-             availability="$SliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
-             units="percent"
-             symbol_name="VsFpu0Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
-             units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 0x1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 PS Output Available"
-             description="The percentage of time in which slice0 PS output is available"
+    <counter name="Slice0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData0Ready"
+             underscore_name="pixel_data0_ready"
+             description="The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output0_available"
              units="percent"
-             symbol_name="PSOutput0Available"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 PS Output Available"
+             symbol_name="PSOutput0Available"
+             underscore_name="ps_output0_available"
+             description="The percentage of time in which slice0 PS output is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Pixel Values Ready"
+             symbol_name="PixelValues0Ready"
+             underscore_name="pixel_values0_ready"
+             description="The percentage of time in which slice0 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 0x1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set Sampler"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="sampler"
-       hw_config_guid="00a9e0fb-3d2e-4405-852c-dce6334ffb3b"
        chipset="SKLGT4"
        symbol_name="Sampler"
+       underscore_name="sampler"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="00a9e0fb-3d2e-4405-852c-dce6334ffb3b"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice2 Input Available"
-             description="The percentage of time in which slice0 subslice2 sampler input is available"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
              units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
              units="percent"
-             symbol_name="EuFpuBothActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
              description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Subslice0 Input Available"
-             description="The percentage of time in which slice0 subslice0 sampler input is available"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
              units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
              units="percent"
-             symbol_name="VsFpu1Active"
              semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
     <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
              description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
              units="pixels"
-             symbol_name="HiDepthTestFails"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Slice0 Subslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 subslice2 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
-             units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Slice0 Subslice1 Input Available"
-             description="The percentage of time in which slice0 subslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
              description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
              units="texels"
-             symbol_name="SamplerTexels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
-             units="percent"
-             symbol_name="PsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
-             units="percent"
-             symbol_name="PsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="Slice0 Subslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 subslice1 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Slice0 Subslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 subslice2 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
+    <counter name="Slice0 Subslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 subslice0 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
              units="percent"
-             symbol_name="PsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 Subslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 subslice2 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
              description="The percentage of time in which slice0 subslice0 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
              units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 Subslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
              description="The percentage of time in which slice0 subslice1 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
              units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
+             mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set TDL_1"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="13dcc50a-7ec0-409b-99d6-a3f932cedcb3"
        chipset="SKLGT4"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="13dcc50a-7ec0-409b-99d6-a3f932cedcb3"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
-             units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$SubsliceMask 0x4 AND"
-             semantic_type="duration"
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
              units="messages"
-             symbol_name="ShaderAtomics"
              semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
              description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x2 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice0"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="NonPS Thread Ready For Dispatch on Slice0 Subslice2"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="PS Thread Ready For Dispatch on Slice0 Subslice1"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TDL_2"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="97875e21-6624-4aee-9191-682feb3eae21"
        chipset="SKLGT4"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="97875e21-6624-4aee-9191-682feb3eae21"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
+             units="ns"
              semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu1_active"
-             units="percent"
-             symbol_name="VsFpu1Active"
-             semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
+             units="cycles"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
              description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$SubsliceMask 0x2 AND"
              semantic_type="duration"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$SubsliceMask 0x2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+    <counter name="VS FPU0 Pipe Active"
+             symbol_name="VsFpu0Active"
+             underscore_name="vs_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS FPU1 Pipe Active"
-             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
+    <counter name="VS FPU1 Pipe Active"
+             symbol_name="VsFpu1Active"
+             underscore_name="vs_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu1_active"
              units="percent"
-             symbol_name="PsFpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction."
+    <counter name="PS FPU0 Pipe Active"
+             symbol_name="PsFpu0Active"
+             underscore_name="ps_fpu0_active"
+             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
              data_type="float"
-             max_equation="100"
-             equation="A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu0_active"
+             max_equation="100"
              units="percent"
-             symbol_name="VsFpu0Active"
              semantic_type="duration"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="PS FPU1 Pipe Active"
+             symbol_name="PsFpu1Active"
+             underscore_name="ps_fpu1_active"
+             description="The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
+             equation="A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$SubsliceMask 0x4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="subslice"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
              description="The total number of rasterized pixels."
              data_type="uint64"
+             units="pixels"
+             semantic_type="event"
              equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="PS FPU0 Pipe Active"
-             description="The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu0_active"
-             units="percent"
-             symbol_name="PsFpu0Active"
-             semantic_type="duration"
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
              description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
              units="pixels"
-             symbol_name="SamplesWritten"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 26 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
              description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis="OGL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
+             units="texels"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis="OGL4 IO BB"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
              description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis="OGL4 OCL IO BB"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis="OGL OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
              description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="OGL4 OCL IO BB"
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             equation="A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_supported_apis="OGL4 OCL IO BB"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis="OGL IO BB"
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
-             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$SubsliceMask 0x1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
-             mdapi_supported_apis="OGL OCL IO BB"
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
-             />
     <counter name="Thread Header Ready on Slice0 Subslice0 Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
+             availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice0 Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
+             semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              availability="$SubsliceMask 0x1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice2 Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Thread Header Ready on Slice0 Subslice1 Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SubsliceMask 0x2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Compute Metrics Extra Gen9"
-       mdapi_supported_apis="OGL4 OCL IO BB"
-       underscore_name="compute_extra"
-       hw_config_guid="a5aa857d-e8f0-4dfa-8981-ce340fa748fd"
        chipset="SKLGT4"
        symbol_name="ComputeExtra"
+       underscore_name="compute_extra"
+       mdapi_supported_apis="OGL4 OCL IO BB"
+       hw_config_guid="a5aa857d-e8f0-4dfa-8981-ce340fa748fd"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU FPU1 Pipe Active"
+             symbol_name="Fpu1Active"
+             underscore_name="fpu1_active"
              description="The percentage of time in which EU FPU1 pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu1_active"
              units="percent"
-             symbol_name="Fpu1Active"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009888" value="0x0D518000" />
         <register type="NOA" address="0x00009888" value="0x0D521980" />
         <register type="NOA" address="0x00009888" value="0x01520000" />
-        <register type="NOA" address="0x00009888" value="0x0D534000" />
-        <register type="NOA" address="0x00009888" value="0x1190FF80" />
-        <register type="NOA" address="0x00009888" value="0x57900000" />
-        <register type="NOA" address="0x00009888" value="0x49900C00" />
-        <register type="NOA" address="0x00009888" value="0x37900000" />
-        <register type="NOA" address="0x00009888" value="0x33900000" />
-        <register type="NOA" address="0x00009888" value="0x4B900002" />
-        <register type="NOA" address="0x00009888" value="0x59900000" />
-        <register type="NOA" address="0x00009888" value="0x51901100" />
-        <register type="NOA" address="0x00009888" value="0x41901000" />
-        <register type="NOA" address="0x00009888" value="0x43901423" />
-        <register type="NOA" address="0x00009888" value="0x53903331" />
-        <register type="NOA" address="0x00009888" value="0x45900044" />
-    </register_config>
-  </set>
-
-  <set name="Media Vme Pipe Gen9"
-       mdapi_supported_apis="MEDIA IO BB"
-       underscore_name="vme_pipe"
-       hw_config_guid="0e8d8b86-4ee7-4cdd-aaaa-58adc92cb29e"
-       chipset="SKLGT4"
-       symbol_name="VMEPipe"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="EU Both FPU Pipes Active"
-             description="The percentage of time in which both EU FPU pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_both_active"
-             units="percent"
-             symbol_name="EuFpuBothActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
+        <register type="NOA" address="0x00009888" value="0x0D534000" />
+        <register type="NOA" address="0x00009888" value="0x1190FF80" />
+        <register type="NOA" address="0x00009888" value="0x57900000" />
+        <register type="NOA" address="0x00009888" value="0x49900C00" />
+        <register type="NOA" address="0x00009888" value="0x37900000" />
+        <register type="NOA" address="0x00009888" value="0x33900000" />
+        <register type="NOA" address="0x00009888" value="0x4B900002" />
+        <register type="NOA" address="0x00009888" value="0x59900000" />
+        <register type="NOA" address="0x00009888" value="0x51901100" />
+        <register type="NOA" address="0x00009888" value="0x41901000" />
+        <register type="NOA" address="0x00009888" value="0x43901423" />
+        <register type="NOA" address="0x00009888" value="0x53903331" />
+        <register type="NOA" address="0x00009888" value="0x45900044" />
+    </register_config>
+  </set>
+
+  <set name="Media Vme Pipe Gen9"
+       chipset="SKLGT4"
+       symbol_name="VMEPipe"
+       underscore_name="vme_pipe"
+       mdapi_supported_apis="MEDIA IO BB"
+       hw_config_guid="0e8d8b86-4ee7-4cdd-aaaa-58adc92cb29e"
+       >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
+             units="percent"
+             semantic_type="duration"
              equation="A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Both FPU Pipes Active"
+             symbol_name="EuFpuBothActive"
+             underscore_name="eu_fpu_both_active"
+             description="The percentage of time in which both EU FPU pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VME Busy"
-             description="The percentage of time in which VME (IME or CRE) was actively processing data."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vme_busy"
              units="percent"
-             symbol_name="VMEBusy"
              semantic_type="duration"
+             equation="8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Overview System Batch Tier2"
-             mdapi_group="VME Pipe"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VME Busy"
+             symbol_name="VMEBusy"
+             underscore_name="vme_busy"
+             description="The percentage of time in which VME (IME or CRE) was actively processing data."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="VME Pipe"
+             mdapi_usage_flags="Overview System Batch Tier2"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gpu Rings Busyness"
-       mdapi_supported_apis="OGL OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="4e5b1599-5b01-4b3d-89fa-6b26a25fe02b"
        chipset="SKLGT4"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OCL MEDIA IO BB"
+       hw_config_guid="4e5b1599-5b01-4b3d-89fa-6b26a25fe02b"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
-             units="percent"
-             symbol_name="VeboxBusy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
              description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Blitter Ring Busy"
-             description="The percentage of time when blitter command streamer was busy."
+    <counter name="Vdbox1 Ring Busy"
+             symbol_name="Vdbox1Busy"
+             underscore_name="vdbox1_busy"
+             description="The percentage of time when Vdbox1 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="AnyRingBusy"
-             description="The percentage of time when any command streamer was busy."
+    <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
+             description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_ring_busy"
              units="percent"
-             symbol_name="AnyRingBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vdbox1 Ring Busy"
-             description="The percentage of time when Vdbox1 command streamer was busy."
+    <counter name="AnyRingBusy"
+             symbol_name="AnyRingBusy"
+             underscore_name="any_ring_busy"
+             description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox1_busy"
              units="percent"
-             symbol_name="Vdbox1Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="MDAPI testing set Gen9"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="882fa433-1f4a-4a67-a962-c741888fe5f5"
        chipset="SKLGT4"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="882fa433-1f4a-4a67-a962-c741888fe5f5"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1."
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 7 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter4"
-             description="HW test counter 4. Factor: 0.333"
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter5"
-             description="HW test counter 5. Factor: 0.333"
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter6"
-             description="HW test counter 6. Factor: 0.166"
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 3 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
+    <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
+             description="HW test counter 4. Factor: 0.333"
              data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
              units="events"
-             symbol_name="Counter3"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
+             description="HW test counter 5. Factor: 0.333"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
+             description="HW test counter 6. Factor: 0.166"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 6 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.666"
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1."
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="C 7 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00009840" value="0x00000080" />
   </set>
 
   <set name="Metric set PMA Stall"
-       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
-       underscore_name="pma__stall"
-       hw_config_guid="befe9fd6-474e-4a3d-b98e-cd793715cf91"
        chipset="SKLGT4"
        symbol_name="PMA_Stall"
+       underscore_name="pma__stall"
+       mdapi_supported_apis="OGL OGL4 OCL IO MEDIA"
+       hw_config_guid="befe9fd6-474e-4a3d-b98e-cd793715cf91"
        >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="STC PMA stall"
+             symbol_name="StcPMAStall"
+             underscore_name="stc_pma_stall"
              description="Percentage of time when stencil cache line and an overlapping pixel are causing stalls"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="stc_pma_stall"
              units="percent"
-             symbol_name="StcPMAStall"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GPU/Stencil Cache"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
index 0420c0f..c3b1e9e 100644 (file)
 <?xml version="1.0"?>
 <metrics version="1568234084" merge_md5="">
   <set name="Render Metrics Basic Gen12"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_basic"
-       hw_config_guid="519a832e-a682-4ef6-a7ac-b12d68116fd7"
        chipset="TGL"
        symbol_name="RenderBasic"
+       underscore_name="render_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="519a832e-a682-4ef6-a7ac-b12d68116fd7"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 12 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS EM Pipe Active"
-             description="The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 11 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_em_active"
-             units="percent"
-             symbol_name="VsEmActive"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
              units="percent"
-             symbol_name="PsEuBothFpuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="Samplers Busy"
+             symbol_name="SamplersBusy"
+             underscore_name="samplers_busy"
              description="The percentage of time in which samplers have been processing EU requests."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ GPU_CLOCK 0 READ FDIV"
-             underscore_name="samplers_busy"
              units="percent"
-             symbol_name="SamplersBusy"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="C 7 READ GPU_CLOCK 0 READ FDIV"
+             availability="$DualSubsliceMask 1 AND"
              mdapi_group="Sampler"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="PS EM Pipe Active"
-             description="The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction."
+    <counter name="Samplers Bottleneck"
+             symbol_name="SamplerBottleneck"
+             underscore_name="sampler_bottleneck"
+             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="2 A 16 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_em_active"
              units="percent"
-             symbol_name="PsEmActive"
              semantic_type="duration"
+             equation="C 6 READ GPU_CLOCK 0 READ FDIV"
+             availability="$DualSubsliceMask 1 AND"
+             mdapi_group="Sampler"
+             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="PS FPU Pipe Active"
-             description="The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 15 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu_active"
-             units="percent"
-             symbol_name="PsFpuActive"
-             semantic_type="duration"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU Pipe Active"
-             description="The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 10 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu_active"
-             units="percent"
-             symbol_name="VsFpuActive"
-             semantic_type="duration"
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
              semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 20 READ A 6 READ UDIV"
-             underscore_name="ps_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="PsEuStallPerThread"
+             units="messages"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
              description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  C 5 READ C 4 READ UADD C 3 READ UADD C 2 READ UADD UMUL"
-             underscore_name="gti_read_throughput"
              units="bytes"
-             symbol_name="GtiReadThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="IO"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="64  C 5 READ C 4 READ UADD C 3 READ UADD C 2 READ UADD UMUL"
              mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="IO"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS AVG Stall per Thread"
-             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
              data_type="uint64"
-             equation="A 14 READ A 1 READ UDIV"
-             underscore_name="vs_eu_stall_per_thread"
-             units="cycles"
-             symbol_name="VsEuStallPerThread"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  C 1 READ C 0 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="IO"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="2 A 17 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="2 A 12 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="VS EM Pipe Active"
+             symbol_name="VsEmActive"
+             underscore_name="vs_em_active"
+             description="The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 A 11 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="FS AVG Active per Thread"
-             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
-             data_type="uint64"
-             equation="A 19 READ A 6 READ UDIV"
-             underscore_name="ps_eu_active_per_thread"
-             units="cycles"
-             symbol_name="PsEuActivePerThread"
-             semantic_type="event"
-             mdapi_supported_apis=""
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="FS EU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the EUs."
+    <counter name="PS EM Pipe Active"
+             symbol_name="PsEmActive"
+             underscore_name="ps_em_active"
+             description="The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_active"
              units="percent"
-             symbol_name="PsEuActive"
              semantic_type="duration"
+             equation="2 A 16 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="PS FPU Pipe Active"
+             symbol_name="PsFpuActive"
+             underscore_name="ps_fpu_active"
+             description="The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 15 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS FPU Pipe Active"
+             symbol_name="VsFpuActive"
+             underscore_name="vs_fpu_active"
+             description="The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 10 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS AVG Stall per Thread"
+             symbol_name="PsEuStallPerThread"
+             underscore_name="ps_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
+             units="cycles"
              semantic_type="event"
+             equation="A 20 READ A 6 READ UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="VS AVG Stall per Thread"
+             symbol_name="VsEuStallPerThread"
+             underscore_name="vs_eu_stall_per_thread"
+             description="The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
+             units="cycles"
              semantic_type="event"
+             equation="A 14 READ A 1 READ UDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="2 A 17 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samplers Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which samplers have been slowing down the pipe when processing EU requests."
+    <counter name="FS AVG Active per Thread"
+             symbol_name="PsEuActivePerThread"
+             underscore_name="ps_eu_active_per_thread"
+             description="The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="A 19 READ A 6 READ UDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="FS EU Active"
+             symbol_name="PsEuActive"
+             underscore_name="ps_eu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the EUs."
              data_type="float"
-             high_watermark="15"
-             equation="C 6 READ GPU_CLOCK 0 READ FDIV"
              max_equation="100"
-             underscore_name="sampler_bottleneck"
              units="percent"
-             symbol_name="SamplerBottleneck"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
+             equation="A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Indicate System Frame Batch Draw"
-             mdapi_group="Sampler"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="FS EU Stall"
-             description="The percentage of time in which fragment shaders were stalled on the EUs."
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="2 A 20 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_stall"
              units="percent"
-             symbol_name="PsEuStall"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="EU Array/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="FS EU Stall"
+             symbol_name="PsEuStall"
+             underscore_name="ps_eu_stall"
+             description="The percentage of time in which fragment shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="2 A 20 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="VS EU Active"
+             symbol_name="VsEuActive"
+             underscore_name="vs_eu_active"
              description="The percentage of time in which vertex shaders were processed actively on the EUs."
              data_type="float"
              max_equation="100"
-             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_active"
              units="percent"
-             symbol_name="VsEuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
     <counter name="VS EU Stall"
+             symbol_name="VsEuStall"
+             underscore_name="vs_eu_stall"
              description="The percentage of time in which vertex shaders were stalled on the EUs."
              data_type="float"
              max_equation="100"
-             equation="2 A 14 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_eu_stall"
              units="percent"
-             symbol_name="VsEuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="2 A 14 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Vertex Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
     <counter name="VS AVG Active per Thread"
+             symbol_name="VsEuActivePerThread"
+             underscore_name="vs_eu_active_per_thread"
              description="The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs."
              data_type="uint64"
-             equation="A 13 READ A 1 READ UDIV"
-             underscore_name="vs_eu_active_per_thread"
              units="cycles"
-             symbol_name="VsEuActivePerThread"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 13 READ A 1 READ UDIV"
              mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  C 1 READ C 0 READ UADD UMUL"
-             underscore_name="gti_write_throughput"
-             units="bytes"
-             symbol_name="GtiWriteThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="IO"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Compute Metrics Basic"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="compute_basic"
-       hw_config_guid="c46cfe86-469f-4499-8452-f44012b68dab"
        chipset="TGL"
        symbol_name="ComputeBasic"
+       underscore_name="compute_basic"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="c46cfe86-469f-4499-8452-f44012b68dab"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU AVG IPC Rate"
-             description="The average rate of IPC calculated for 2 FPU pipelines."
-             data_type="float"
-             max_equation="2"
-             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
-             underscore_name="eu_avg_ipc_rate"
-             units="number"
-             symbol_name="EuAvgIpcRate"
-             semantic_type="ratio"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 4 READ"
              mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
-             data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
-    <counter name="Pixels Failing Tests"
-             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
-             data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
-             units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="EM Pipe Active"
-             description="The percentage of time in which EU EM pipeline was actively processing."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="2 A 11 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="em_active"
              units="percent"
-             symbol_name="EmActive"
              semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
-             data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
-             units="texels"
-             symbol_name="SamplerTexelMisses"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
-             />
-    <counter name="EU FPU Pipe Active"
-             description="The percentage of time in which EU FPU pipeline was actively processing."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="2 A 10 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="fpu_active"
              units="percent"
-             symbol_name="FpuActive"
              semantic_type="duration"
+             equation="2 8 A 13 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
-             units="bytes"
-             symbol_name="SlmBytesRead"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GTI Read Throughput"
-             description="The total number of GPU memory bytes read from GTI."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  C 7 READ C 6 READ UADD C 5 READ UADD C 4 READ UADD UMUL"
-             underscore_name="gti_read_throughput"
-             units="bytes"
-             symbol_name="GtiReadThroughput"
-             semantic_type="throughput"
-             mdapi_supported_apis="IO"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
-             data_type="float"
-             max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
-             units="percent"
-             symbol_name="GpuBusy"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
              units="pixels"
-             symbol_name="RasterizedPixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
-             units="percent"
-             symbol_name="EuFpuEmActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
-             data_type="float"
-             max_equation="100"
-             equation="2 8 A 13 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
-             units="percent"
-             symbol_name="EuThreadOccupancy"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
+             description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
              units="pixels"
-             symbol_name="SamplesBlended"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
              units="pixels"
-             symbol_name="EarlyDepthTestFails"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="GTI Write Throughput"
-             description="The total number of GPU memory bytes written to GTI."
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL"
-             equation="64  C 3 READ C 2 READ UADD UMUL"
-             underscore_name="gti_write_throughput"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="GtiWriteThroughput"
              semantic_type="throughput"
-             mdapi_supported_apis="IO"
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
              units="bytes"
-             symbol_name="L3ShaderThroughput"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
              mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
              data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
+             units="messages"
              semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
              units="bytes"
-             symbol_name="SlmBytesWritten"
              semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
              data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
+             units="messages"
              semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
+    <counter name="GTI Read Throughput"
+             symbol_name="GtiReadThroughput"
+             underscore_name="gti_read_throughput"
+             description="The total number of GPU memory bytes read from GTI."
              data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  C 7 READ C 6 READ UADD C 5 READ UADD C 4 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="IO"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GTI Write Throughput"
+             symbol_name="GtiWriteThroughput"
+             underscore_name="gti_write_throughput"
+             description="The total number of GPU memory bytes written to GTI."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="64  C 3 READ C 2 READ UADD UMUL"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis="IO"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU AVG IPC Rate"
+             symbol_name="EuAvgIpcRate"
+             underscore_name="eu_avg_ipc_rate"
+             description="The average rate of IPC calculated for 2 FPU pipelines."
+             data_type="float"
+             max_equation="2"
+             units="number"
+             semantic_type="ratio"
+             equation="A 9 READ A 10 READ A 11 READ FADD A 9 READ FSUB FDIV 1 FADD"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier4 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
-    <counter name="EU Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing."
+    <counter name="EM Pipe Active"
+             symbol_name="EmActive"
+             underscore_name="em_active"
+             description="The percentage of time in which EU EM pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 11 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU Pipe Active"
+             symbol_name="FpuActive"
+             underscore_name="fpu_active"
+             description="The percentage of time in which EU FPU pipeline was actively processing."
              data_type="float"
              max_equation="100"
-             equation="2 A 12 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_send_active"
              units="percent"
-             symbol_name="EuSendActive"
              semantic_type="duration"
+             equation="2 A 10 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Send Pipe Active"
+             symbol_name="EuSendActive"
+             underscore_name="eu_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 12 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Render Metrics for 3D Pipeline Profile"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="render_pipe_profile"
-       hw_config_guid="77ae98cf-9a9e-4e35-be85-597b09ffbe53"
        chipset="TGL"
        symbol_name="RenderPipeProfile"
+       underscore_name="render_pipe_profile"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="77ae98cf-9a9e-4e35-be85-597b09ffbe53"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Bottleneck"
-             low_watermark="10"
-             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="30"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="vs_bottleneck"
-             units="percent"
-             symbol_name="VsBottleneck"
-             semantic_type="duration"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Hi-Depth Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="hi_depth_bottleneck"
-             units="percent"
-             symbol_name="HiDepthBottleneck"
-             semantic_type="duration"
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="gs_bottleneck"
-             units="percent"
-             symbol_name="GsBottleneck"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 5 READ"
              mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
-             data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="BC Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
-             high_watermark="15"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="bc_bottleneck"
              units="percent"
-             symbol_name="BcBottleneck"
              semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Stall"
-             description="The percentage of time in which hull stall pipeline stage was stalled."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="hs_stall"
              units="percent"
-             symbol_name="HsStall"
              semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Sampler Texels Misses"
-             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
+             description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="texels"
              semantic_type="event"
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
+             mdapi_usage_flags="Tier3 Batch Frame Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
-             />
-    <counter name="VF Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="15"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="vf_bottleneck"
-             units="percent"
-             symbol_name="VfBottleneck"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Input Assembler"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
              description="The total number of GPU memory bytes read from shared local memory."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
              equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Strip-Fans Bottleneck"
-             low_watermark="5"
-             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
-             high_watermark="10"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             max_equation="100"
-             underscore_name="sf_bottleneck"
-             units="percent"
-             symbol_name="SfBottleneck"
-             semantic_type="duration"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SF Stall"
-             description="The percentage of time in which strip-fans pipeline stage was stalled."
-             data_type="float"
-             max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sf_stall"
-             units="percent"
-             symbol_name="SfStall"
-             semantic_type="duration"
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VF Bottleneck"
+             symbol_name="VfBottleneck"
+             underscore_name="vf_bottleneck"
+             description="The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Input Assembler"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Bottleneck"
-             low_watermark="3"
-             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+    <counter name="VS Bottleneck"
+             symbol_name="VsBottleneck"
+             underscore_name="vs_bottleneck"
+             description="The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="10"
+             high_watermark="30"
              data_type="float"
-             high_watermark="9"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
              max_equation="100"
-             underscore_name="hs_bottleneck"
              units="percent"
-             symbol_name="HsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Vertex Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="CL Stall"
-             description="The percentage of time in which clipper pipeline stage was stalled."
+    <counter name="HS Bottleneck"
+             symbol_name="HsBottleneck"
+             underscore_name="hs_bottleneck"
+             description="The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline."
+             low_watermark="3"
+             high_watermark="9"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="cl_stall"
              units="percent"
-             symbol_name="ClStall"
              semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Clipper"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SO Bottleneck"
+    <counter name="DS Bottleneck"
+             symbol_name="DsBottleneck"
+             underscore_name="ds_bottleneck"
+             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="so_bottleneck"
              units="percent"
-             symbol_name="SoBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="DS Bottleneck"
+    <counter name="GS Bottleneck"
+             symbol_name="GsBottleneck"
+             underscore_name="gs_bottleneck"
+             description="The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline."
              low_watermark="5"
-             description="The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
              high_watermark="15"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="ds_bottleneck"
              units="percent"
-             symbol_name="DsBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Geometry Shader"
              mdapi_usage_flags="Tier3 Indicate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="SO Bottleneck"
+             symbol_name="SoBottleneck"
+             underscore_name="so_bottleneck"
+             description="The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Clipper Bottleneck"
-             low_watermark="10"
+             symbol_name="ClBottleneck"
+             underscore_name="cl_bottleneck"
              description="The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline."
-             data_type="float"
+             low_watermark="10"
              high_watermark="30"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             data_type="float"
              max_equation="100"
-             underscore_name="cl_bottleneck"
              units="percent"
-             symbol_name="ClBottleneck"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Indicate Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier3 Indicate Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="DS Stall"
-             description="The percentage of time in which domain shader pipeline stage was stalled."
+    <counter name="Strip-Fans Bottleneck"
+             symbol_name="SfBottleneck"
+             underscore_name="sf_bottleneck"
+             description="The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="10"
              data_type="float"
              max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ds_stall"
              units="percent"
-             symbol_name="DsStall"
              semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Domain Shader"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="Hi-Depth Bottleneck"
+             symbol_name="HiDepthBottleneck"
+             underscore_name="hi_depth_bottleneck"
+             description="The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+    <counter name="BC Bottleneck"
+             symbol_name="BcBottleneck"
+             underscore_name="bc_bottleneck"
+             description="The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline."
+             low_watermark="5"
+             high_watermark="15"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Barycentric Calc"
+             mdapi_usage_flags="Tier3 Indicate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="HS Stall"
+             symbol_name="HsStall"
+             underscore_name="hs_stall"
+             description="The percentage of time in which hull stall pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Hull Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+    <counter name="DS Stall"
+             symbol_name="DsStall"
+             underscore_name="ds_stall"
+             description="The percentage of time in which domain shader pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Domain Shader"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="SO Stall"
+             symbol_name="SoStall"
+             underscore_name="so_stall"
+             description="The percentage of time in which stream-output pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Stream Output"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="CL Stall"
+             symbol_name="ClStall"
+             underscore_name="cl_stall"
+             description="The percentage of time in which clipper pipeline stage was stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Clipper"
+             mdapi_usage_flags="Tier4 Correlate Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="SO Stall"
-             description="The percentage of time in which stream-output pipeline stage was stalled."
+    <counter name="SF Stall"
+             symbol_name="SfStall"
+             underscore_name="sf_stall"
+             description="The percentage of time in which strip-fans pipeline stage was stalled."
              data_type="float"
              max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="so_stall"
              units="percent"
-             symbol_name="SoStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Rasterizer/Strip-Fans"
              mdapi_usage_flags="Tier4 Correlate Draw"
-             mdapi_group="3D Pipe/Stream Output"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set HDCAndSF"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="hdc_and_sf"
-       hw_config_guid="dedd95cd-1bd4-4e65-be7f-1fd7aa43fe12"
        chipset="TGL"
        symbol_name="HDCAndSF"
+       underscore_name="hdc_and_sf"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="dedd95cd-1bd4-4e65-be7f-1fd7aa43fe12"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 12 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
              units="percent"
-             symbol_name="VsSendActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS EM Pipe Active"
-             description="The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 11 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_em_active"
-             units="percent"
-             symbol_name="VsEmActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
              mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
-             />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Polygon Data Ready"
-             description="The percentage of time in which geometry pipeline output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="poly_data_ready"
-             units="percent"
-             symbol_name="PolyDataReady"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe/Strip-Fans"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Dualsubslice4 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice4)"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader04_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader04AccessStalledOnL3"
-             availability="$DualSubsliceMask 16 AND"
              semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS EM Pipe Active"
-             description="The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="2 A 16 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_em_active"
              units="percent"
-             symbol_name="PsEmActive"
              semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
              semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 25 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="PS FPU Pipe Active"
-             description="The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 15 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu_active"
-             units="percent"
-             symbol_name="PsFpuActive"
-             semantic_type="duration"
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="texels"
              semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="VS FPU Pipe Active"
-             description="The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 10 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu_active"
-             units="percent"
-             symbol_name="VsFpuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1)"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader01_access_stalled_on_l3"
-             units="percent"
-             symbol_name="NonSamplerShader01AccessStalledOnL3"
-             availability="$DualSubsliceMask 2 AND"
-             semantic_type="duration"
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
+             units="messages"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2)"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader02_access_stalled_on_l3"
-             units="percent"
-             symbol_name="NonSamplerShader02AccessStalledOnL3"
-             availability="$DualSubsliceMask 4 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader00AccessStalledOnL3"
+             underscore_name="non_sampler_shader00_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0)"
              data_type="float"
              max_equation="100"
-             equation="2 A 17 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader01AccessStalledOnL3"
+             underscore_name="non_sampler_shader01_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1)"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis=""
+    <counter name="Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader02AccessStalledOnL3"
+             underscore_name="non_sampler_shader02_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader03AccessStalledOnL3"
+             underscore_name="non_sampler_shader03_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice3)"
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
              semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
+    <counter name="Slice0 Dualsubslice4 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader04AccessStalledOnL3"
+             underscore_name="non_sampler_shader04_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice4)"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
+             mdapi_group="GPU/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
-             />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 Dualsubslice5 Non-sampler Shader Access Stalled On L3"
+             symbol_name="NonSamplerShader05AccessStalledOnL3"
+             underscore_name="non_sampler_shader05_access_stalled_on_l3"
+             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice5)"
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
+             mdapi_group="GPU/Data Port"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="subslice"
              />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
+    <counter name="Polygon Data Ready"
+             symbol_name="PolyDataReady"
+             underscore_name="poly_data_ready"
+             description="The percentage of time in which geometry pipeline output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe/Strip-Fans"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 12 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="VS EM Pipe Active"
+             symbol_name="VsEmActive"
+             underscore_name="vs_em_active"
+             description="The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 11 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="PS EM Pipe Active"
+             symbol_name="PsEmActive"
+             underscore_name="ps_em_active"
+             description="The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 16 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0)"
+    <counter name="PS FPU Pipe Active"
+             symbol_name="PsFpuActive"
+             underscore_name="ps_fpu_active"
+             description="The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader00_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader00AccessStalledOnL3"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
+             equation="2 A 15 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice3)"
+    <counter name="VS FPU Pipe Active"
+             symbol_name="VsFpuActive"
+             underscore_name="vs_fpu_active"
+             description="The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader03_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader03AccessStalledOnL3"
-             availability="$DualSubsliceMask 8 AND"
              semantic_type="duration"
+             equation="2 A 10 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Dualsubslice5 Non-sampler Shader Access Stalled On L3"
-             description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice5)"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_sampler_shader05_access_stalled_on_l3"
              units="percent"
-             symbol_name="NonSamplerShader05AccessStalledOnL3"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
+             equation="2 A 17 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="GPU/Data Port"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Metric set RasterizerAndPixelBackend"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="rasterizer_and_pixel_backend"
-       hw_config_guid="a889ccb3-5ebd-437f-b5c6-e951fba822f5"
        chipset="TGL"
        symbol_name="RasterizerAndPixelBackend"
+       underscore_name="rasterizer_and_pixel_backend"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="a889ccb3-5ebd-437f-b5c6-e951fba822f5"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Send Pipe Active"
-             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 12 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_send_active"
-             units="percent"
-             symbol_name="VsSendActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Pipe0 PS Output Available"
-             description="The percentage of time in which slice0 pipe0 PS output is available"
-             data_type="float"
-             max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output00_available"
-             units="percent"
-             symbol_name="PSOutput00Available"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="VS EM Pipe Active"
-             description="The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 11 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_em_active"
              units="percent"
-             symbol_name="VsEmActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
-    <counter name="Slice0 Pipe1 PS Output Available"
-             description="The percentage of time in which slice0 pipe1 PS output is available"
-             data_type="float"
-             max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output01_available"
-             units="percent"
-             symbol_name="PSOutput01Available"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Early Hi-Depth Test Fails"
-             description="The total number of pixels dropped on early hierarchical depth test."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 22 READ 4 UMUL"
-             underscore_name="hi_depth_test_fails"
-             units="pixels"
-             symbol_name="HiDepthTestFails"
+             units="threads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
              />
-    <counter name="FS Both FPU Active"
-             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
-             data_type="float"
-             max_equation="100"
-             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_eu_both_fpu_active"
-             units="percent"
-             symbol_name="PsEuBothFpuActive"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Frame Batch Draw"
-             mdapi_group="3D Pipe/Fragment Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Slice0 Pipe1 Pixel Values Ready"
-             description="The percentage of time in which slice0 pipe1 pixel values are ready"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values01_ready"
              units="percent"
-             symbol_name="PixelValues01Ready"
              semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS EM Pipe Active"
-             description="The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="2 A 16 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_em_active"
              units="percent"
-             symbol_name="PsEmActive"
              semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Rasterized Pixels"
+             symbol_name="RasterizedPixels"
+             underscore_name="rasterized_pixels"
+             description="The total number of rasterized pixels."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 21 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Early Hi-Depth Test Fails"
+             symbol_name="HiDepthTestFails"
+             underscore_name="hi_depth_test_fails"
+             description="The total number of pixels dropped on early hierarchical depth test."
+             data_type="uint64"
+             units="pixels"
+             semantic_type="event"
+             equation="A 22 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Hi-Depth Test"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Barrier Messages"
-             description="The total number of shader barrier messages."
+    <counter name="Early Depth Test Fails"
+             symbol_name="EarlyDepthTestFails"
+             underscore_name="early_depth_test_fails"
+             description="The total number of pixels dropped on early depth test."
              data_type="uint64"
-             equation="A 35 READ"
-             underscore_name="shader_barriers"
-             units="messages"
-             symbol_name="ShaderBarriers"
+             units="pixels"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 23 READ 4 UMUL"
+             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Barrier"
              />
-    <counter name="Sampler Texels"
-             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+    <counter name="Samples Killed in FS"
+             symbol_name="SamplesKilledInPs"
+             underscore_name="samples_killed_in_ps"
+             description="The total number of samples or pixels dropped in fragment shaders."
              data_type="uint64"
-             equation="A 28 READ 4 UMUL"
-             underscore_name="sampler_texels"
-             units="texels"
-             symbol_name="SamplerTexels"
+             units="pixels"
              semantic_type="event"
+             equation="A 24 READ 4 UMUL"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Input"
              />
     <counter name="Pixels Failing Tests"
+             symbol_name="PixelsFailingPostPsTests"
+             underscore_name="pixels_failing_post_ps_tests"
              description="The total number of pixels dropped on post-FS alpha, stencil, or depth tests."
              data_type="uint64"
-             equation="A 25 READ 4 UMUL"
-             underscore_name="pixels_failing_post_ps_tests"
              units="pixels"
-             symbol_name="PixelsFailingPostPsTests"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="A 25 READ 4 UMUL"
              mdapi_group="3D Pipe/Output Merger"
-             />
-    <counter name="PS FPU Pipe Active"
-             description="The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 15 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_fpu_active"
-             units="percent"
-             symbol_name="PsFpuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="Samples Written"
+             symbol_name="SamplesWritten"
+             underscore_name="samples_written"
+             description="The total number of samples or pixels written to all render targets."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="pixels"
+             semantic_type="event"
+             equation="A 26 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="Samples Blended"
+             symbol_name="SamplesBlended"
+             underscore_name="samples_blended"
+             description="The total number of blended samples or pixels written to all render targets."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="pixels"
              semantic_type="event"
+             equation="A 27 READ 4 UMUL"
+             mdapi_group="3D Pipe/Output Merger"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="VS FPU Pipe Active"
-             description="The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 10 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vs_fpu_active"
-             units="percent"
-             symbol_name="VsFpuActive"
-             semantic_type="duration"
+    <counter name="Sampler Texels"
+             symbol_name="SamplerTexels"
+             underscore_name="sampler_texels"
+             description="The total number of texels seen on input (with 2x2 accuracy) in all sampler units."
+             data_type="uint64"
+             units="texels"
+             semantic_type="event"
+             equation="A 28 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Input"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Vertex Shader"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Sampler Texels Misses"
+             symbol_name="SamplerTexelMisses"
+             underscore_name="sampler_texel_misses"
              description="The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache."
              data_type="uint64"
-             equation="A 29 READ 4 UMUL"
-             underscore_name="sampler_texel_misses"
              units="texels"
-             symbol_name="SamplerTexelMisses"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 29 READ 4 UMUL"
+             mdapi_group="Sampler/Sampler Cache"
              mdapi_usage_flags="Tier3 Batch Frame Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="Sampler/Sampler Cache"
              />
-    <counter name="Slice0 Pipe0 Post-EarlyZ Pixel Data Ready"
-             description="The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_data00_ready"
-             units="percent"
-             symbol_name="PixelData00Ready"
-             semantic_type="duration"
+    <counter name="SLM Bytes Read"
+             symbol_name="SlmBytesRead"
+             underscore_name="slm_bytes_read"
+             description="The total number of GPU memory bytes read from shared local memory."
+             data_type="uint64"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 30 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer/Early Depth Test"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="SLM Bytes Written"
+             symbol_name="SlmBytesWritten"
+             underscore_name="slm_bytes_written"
+             description="The total number of GPU memory bytes written into shared local memory."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
+             units="bytes"
+             semantic_type="throughput"
+             equation="A 31 READ 64 UMUL"
+             mdapi_group="L3/Data Port/SLM"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Memory Accesses"
+             symbol_name="ShaderMemoryAccesses"
+             underscore_name="shader_memory_accesses"
+             description="The total number of shader memory accesses to L3."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 32 READ"
+             mdapi_group="L3/Data Port"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Slice0 Rasterizer Input Available"
-             description="The percentage of time in which slice0 rasterizer input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_input_available"
-             units="percent"
-             symbol_name="Rasterizer0InputAvailable"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="Shader Atomic Memory Accesses"
+             symbol_name="ShaderAtomics"
+             underscore_name="shader_atomics"
+             description="The total number of shader atomic memory accesses."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 34 READ"
+             mdapi_group="L3/Data Port/Atomics"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="SLM Bytes Read"
-             description="The total number of GPU memory bytes read from shared local memory."
+    <counter name="L3 Shader Throughput"
+             symbol_name="L3ShaderThroughput"
+             underscore_name="l3_shader_throughput"
+             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
              data_type="uint64"
              max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ 64 UMUL"
-             underscore_name="slm_bytes_read"
              units="bytes"
-             symbol_name="SlmBytesRead"
              semantic_type="throughput"
+             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
+             mdapi_group="L3/Data Port"
+             mdapi_usage_flags="Tier2 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="Shader Barrier Messages"
+             symbol_name="ShaderBarriers"
+             underscore_name="shader_barriers"
+             description="The total number of shader barrier messages."
+             data_type="uint64"
+             units="messages"
+             semantic_type="event"
+             equation="A 35 READ"
+             mdapi_group="EU Array/Barrier"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Send Pipeline Active"
-             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+    <counter name="Slice0 Rasterizer Input Available"
+             symbol_name="Rasterizer0InputAvailable"
+             underscore_name="rasterizer0_input_available"
+             description="The percentage of time in which slice0 rasterizer input is available"
              data_type="float"
              max_equation="100"
-             equation="2 A 17 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_send_active"
              units="percent"
-             symbol_name="PsSendActive"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pixel Shader"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 Rasterizer Output Ready"
+             symbol_name="Rasterizer0OutputReady"
+             underscore_name="rasterizer0_output_ready"
+             description="The percentage of time in which slice0 rasterizer output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GPU/Rasterizer"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Slice0 Pipe0 Post-EarlyZ Pixel Data Ready"
+             symbol_name="PixelData00Ready"
+             underscore_name="pixel_data00_ready"
+             description="The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied)"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/Rasterizer/Early Depth Test"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="SQ01 is full"
-             description="The percentage of time when IDI0 SQ1 is filled above a threshold (usually 48 entries)"
+    <counter name="Slice0 Pipe0 PS Output Available"
+             symbol_name="PSOutput00Available"
+             underscore_name="ps_output00_available"
+             description="The percentage of time in which slice0 pipe0 PS output is available"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue01_full"
              units="percent"
-             symbol_name="GTRequestQueue01Full"
              semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Rasterized Pixels"
-             description="The total number of rasterized pixels."
-             data_type="uint64"
-             equation="A 21 READ 4 UMUL"
-             underscore_name="rasterized_pixels"
-             units="pixels"
-             symbol_name="RasterizedPixels"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 Pipe2 Pixel Values Ready"
-             description="The percentage of time in which slice0 pipe2 pixel values are ready"
+    <counter name="Slice0 Pipe1 PS Output Available"
+             symbol_name="PSOutput01Available"
+             underscore_name="ps_output01_available"
+             description="The percentage of time in which slice0 pipe1 PS output is available"
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values02_ready"
              units="percent"
-             symbol_name="PixelValues02Ready"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="Slice0 Pipe2 PS Output Available"
+             symbol_name="PSOutput02Available"
+             underscore_name="ps_output02_available"
+             description="The percentage of time in which slice0 pipe2 PS output is available"
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
              semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Slice0 Pipe0 Pixel Values Ready"
+             symbol_name="PixelValues00Ready"
+             underscore_name="pixel_values00_ready"
              description="The percentage of time in which slice0 pipe0 pixel values are ready"
              data_type="float"
              max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="pixel_values00_ready"
              units="percent"
-             symbol_name="PixelValues00Ready"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="SQ10 is full"
-             description="The percentage of time when IDI1 SQ0 is filled above a threshold (usually 48 entries)"
+    <counter name="Slice0 Pipe1 Pixel Values Ready"
+             symbol_name="PixelValues01Ready"
+             underscore_name="pixel_values01_ready"
+             description="The percentage of time in which slice0 pipe1 pixel values are ready"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue10_full"
              units="percent"
-             symbol_name="GTRequestQueue10Full"
              semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Samples Written"
-             description="The total number of samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 26 READ 4 UMUL"
-             underscore_name="samples_written"
-             units="pixels"
-             symbol_name="SamplesWritten"
-             semantic_type="event"
-             mdapi_supported_apis=""
+    <counter name="Slice0 Pipe2 Pixel Values Ready"
+             symbol_name="PixelValues02Ready"
+             underscore_name="pixel_values02_ready"
+             description="The percentage of time in which slice0 pipe2 pixel values are ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU/3D Pipe"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
              />
     <counter name="SQ00 is full"
+             symbol_name="GTRequestQueue00Full"
+             underscore_name="gt_request_queue00_full"
              description="The percentage of time when IDI0 SQ0 is filled above a threshold (usually 48 entries)"
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue00_full"
              units="percent"
-             symbol_name="GTRequestQueue00Full"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="SQ01 is full"
+             symbol_name="GTRequestQueue01Full"
+             underscore_name="gt_request_queue01_full"
+             description="The percentage of time when IDI0 SQ1 is filled above a threshold (usually 48 entries)"
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Pipe2 PS Output Available"
-             description="The percentage of time in which slice0 pipe2 PS output is available"
+    <counter name="SQ10 is full"
+             symbol_name="GTRequestQueue10Full"
+             underscore_name="gt_request_queue10_full"
+             description="The percentage of time when IDI1 SQ0 is filled above a threshold (usually 48 entries)"
              data_type="float"
              max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_output02_available"
              units="percent"
-             symbol_name="PSOutput02Available"
              semantic_type="duration"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/3D Pipe"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 Rasterizer Output Ready"
-             description="The percentage of time in which slice0 rasterizer output is ready"
+    <counter name="SQ11 is full"
+             symbol_name="GTRequestQueue11Full"
+             underscore_name="gt_request_queue11_full"
+             description="The percentage of time when IDI1 SQ1 is filled above a threshold (usually 48 entries)"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="rasterizer0_output_ready"
              units="percent"
-             symbol_name="Rasterizer0OutputReady"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GTI"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Rasterizer"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Samples Blended"
-             description="The total number of blended samples or pixels written to all render targets."
-             data_type="uint64"
-             equation="A 27 READ 4 UMUL"
-             underscore_name="samples_blended"
-             units="pixels"
-             symbol_name="SamplesBlended"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Output Merger"
              />
-    <counter name="Early Depth Test Fails"
-             description="The total number of pixels dropped on early depth test."
-             data_type="uint64"
-             equation="A 23 READ 4 UMUL"
-             underscore_name="early_depth_test_fails"
-             units="pixels"
-             symbol_name="EarlyDepthTestFails"
-             semantic_type="event"
+    <counter name="VS Send Pipe Active"
+             symbol_name="VsSendActive"
+             underscore_name="vs_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 12 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Rasterizer/Early Depth Test"
              />
-    <counter name="SQ11 is full"
-             description="The percentage of time when IDI1 SQ1 is filled above a threshold (usually 48 entries)"
+    <counter name="VS EM Pipe Active"
+             symbol_name="VsEmActive"
+             underscore_name="vs_em_active"
+             description="The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gt_request_queue11_full"
              units="percent"
-             symbol_name="GTRequestQueue11Full"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="2 A 11 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="GTI"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Memory Accesses"
-             description="The total number of shader memory accesses to L3."
-             data_type="uint64"
-             equation="A 32 READ"
-             underscore_name="shader_memory_accesses"
-             units="messages"
-             symbol_name="ShaderMemoryAccesses"
-             semantic_type="event"
+    <counter name="FS Both FPU Active"
+             symbol_name="PsEuBothFpuActive"
+             underscore_name="ps_eu_both_fpu_active"
+             description="The percentage of time in which fragment shaders were processed actively on the both FPUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="3D Pipe/Fragment Shader"
+             mdapi_usage_flags="Tier4 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="PS EM Pipe Active"
+             symbol_name="PsEmActive"
+             underscore_name="ps_em_active"
+             description="The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 16 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="L3 Shader Throughput"
-             description="The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL"
-             underscore_name="l3_shader_throughput"
-             units="bytes"
-             symbol_name="L3ShaderThroughput"
-             semantic_type="throughput"
+    <counter name="PS FPU Pipe Active"
+             symbol_name="PsFpuActive"
+             underscore_name="ps_fpu_active"
+             description="The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 15 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Frame Batch Draw"
-             mdapi_group="L3/Data Port"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Samples Killed in FS"
-             description="The total number of samples or pixels dropped in fragment shaders."
-             data_type="uint64"
-             equation="A 24 READ 4 UMUL"
-             underscore_name="samples_killed_in_ps"
-             units="pixels"
-             symbol_name="SamplesKilledInPs"
-             semantic_type="event"
+    <counter name="VS FPU Pipe Active"
+             symbol_name="VsFpuActive"
+             underscore_name="vs_fpu_active"
+             description="The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 10 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier4 Overview Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="3D Pipe/Fragment Shader"
              />
-    <counter name="SLM Bytes Written"
-             description="The total number of GPU memory bytes written into shared local memory."
-             data_type="uint64"
-             max_equation="$GpuCoreClocks 64 UMUL $EuSubslicesTotalCount UMUL"
-             equation="A 31 READ 64 UMUL"
-             underscore_name="slm_bytes_written"
-             units="bytes"
-             symbol_name="SlmBytesWritten"
-             semantic_type="throughput"
+    <counter name="PS Send Pipeline Active"
+             symbol_name="PsSendActive"
+             underscore_name="ps_send_active"
+             description="The percentage of time in which EU send pipeline was actively processing a pixel shader instruction."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 17 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pixel Shader"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_group="L3/Data Port/SLM"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Shader Atomic Memory Accesses"
-             description="The total number of shader atomic memory accesses."
-             data_type="uint64"
-             equation="A 34 READ"
-             underscore_name="shader_atomics"
-             units="messages"
-             symbol_name="ShaderAtomics"
-             semantic_type="event"
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="L3/Data Port/Atomics"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="Gen12LP L3_1"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_1"
-       hw_config_guid="feee2629-03a8-4d31-ab4d-7d16572163fb"
        chipset="TGL"
        symbol_name="L3_1"
+       underscore_name="l3_1"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="feee2629-03a8-4d31-ab4d-7d16572163fb"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank5 Input Available"
-             description="The percentage of time in which slice0 L3 bank5 has input available"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 7 READ B 6 READ FADD B 5 READ FADD B 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank5_input_available"
              units="percent"
-             symbol_name="L30Bank5InputAvailable"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank0 Input Available"
-             description="The percentage of time in which slice0 L3 bank0 has input available"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ FADD C 5 READ FADD C 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_input_available"
              units="percent"
-             symbol_name="L30Bank0InputAvailable"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
              units="threads"
-             symbol_name="HsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Slice0 L3 Bank0 Input Available"
+             symbol_name="L30Bank0InputAvailable"
+             underscore_name="l30_bank0_input_available"
+             description="The percentage of time in which slice0 L3 bank0 has input available"
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="C 7 READ C 6 READ FADD C 5 READ FADD C 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
     <counter name="Slice0 L3 Bank1 Input Available"
+             symbol_name="L30Bank1InputAvailable"
+             underscore_name="l30_bank1_input_available"
              description="The percentage of time in which slice0 L3 bank1 has input available"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ C 2 READ FADD C 1 READ FADD C 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_input_available"
              units="percent"
-             symbol_name="L30Bank1InputAvailable"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 3 READ C 2 READ FADD C 1 READ FADD C 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 L3 Bank4 Input Available"
+             symbol_name="L30Bank4InputAvailable"
+             underscore_name="l30_bank4_input_available"
+             description="The percentage of time in which slice0 L3 bank4 has input available"
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 3 READ B 2 READ FADD B 1 READ FADD B 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="Slice0 L3 Bank4 Input Available"
-             description="The percentage of time in which slice0 L3 bank4 has input available"
+    <counter name="Slice0 L3 Bank5 Input Available"
+             symbol_name="L30Bank5InputAvailable"
+             underscore_name="l30_bank5_input_available"
+             description="The percentage of time in which slice0 L3 bank5 has input available"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ B 2 READ FADD B 1 READ FADD B 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank4_input_available"
              units="percent"
-             symbol_name="L30Bank4InputAvailable"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="B 7 READ B 6 READ FADD B 5 READ FADD B 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="OA" address="0x0000D900" value="0x00000000" />
         <register type="OA" address="0x0000D904" value="0x00800000" />
         <register type="OA" address="0x0000D910" value="0x00000000" />
-        <register type="OA" address="0x0000D914" value="0x00800000" />
-        <register type="OA" address="0x0000DC40" value="0x00000000" />
-    </register_config>
-    <register_config type="FLEX">
-        <register type="FLEX" address="0x0000E458" value="0x00005004" />
-        <register type="FLEX" address="0x0000E558" value="0x00008003" />
-    </register_config>
-  </set>
-
-  <set name="Gen12LP L3_2"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_2"
-       hw_config_guid="5266f235-1711-4eef-9493-ebdf0238d512"
-       chipset="TGL"
-       symbol_name="L3_2"
-       >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 L3 Bank3 Input Available"
-             description="The percentage of time in which slice0 L3 bank3 has input available"
-             data_type="float"
-             max_equation="100"
-             equation="B 7 READ B 6 READ FADD B 5 READ FADD B 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_input_available"
-             units="percent"
-             symbol_name="L30Bank3InputAvailable"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Slice0 L3 Bank6 Input Available"
-             description="The percentage of time in which slice0 L3 bank6 has input available"
-             data_type="float"
-             max_equation="100"
-             equation="C 3 READ C 2 READ FADD C 1 READ FADD C 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank6_input_available"
-             units="percent"
-             symbol_name="L30Bank6InputAvailable"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Slice0 L3 Bank2 Input Available"
-             description="The percentage of time in which slice0 L3 bank2 has input available"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ B 2 READ FADD B 1 READ FADD B 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_input_available"
-             units="percent"
-             symbol_name="L30Bank2InputAvailable"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
-             units="percent"
-             symbol_name="EuFpuEmActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
+        <register type="OA" address="0x0000D914" value="0x00800000" />
+        <register type="OA" address="0x0000DC40" value="0x00000000" />
+    </register_config>
+    <register_config type="FLEX">
+        <register type="FLEX" address="0x0000E458" value="0x00005004" />
+        <register type="FLEX" address="0x0000E558" value="0x00008003" />
+    </register_config>
+  </set>
+
+  <set name="Gen12LP L3_2"
+       chipset="TGL"
+       symbol_name="L3_2"
+       underscore_name="l3_2"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="5266f235-1711-4eef-9493-ebdf0238d512"
+       >
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
+             units="cycles"
              semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
              units="threads"
-             symbol_name="HsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
              description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank7 Input Available"
-             description="The percentage of time in which slice0 L3 bank7 has input available"
+    <counter name="Slice0 L3 Bank2 Input Available"
+             symbol_name="L30Bank2InputAvailable"
+             underscore_name="l30_bank2_input_available"
+             description="The percentage of time in which slice0 L3 bank2 has input available"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ FADD C 5 READ FADD C 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank7_input_available"
              units="percent"
-             symbol_name="L30Bank7InputAvailable"
+             semantic_type="duration"
+             equation="B 3 READ B 2 READ FADD B 1 READ FADD B 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank3 Input Available"
+             symbol_name="L30Bank3InputAvailable"
+             underscore_name="l30_bank3_input_available"
+             description="The percentage of time in which slice0 L3 bank3 has input available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 7 READ B 6 READ FADD B 5 READ FADD B 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank6 Input Available"
+             symbol_name="L30Bank6InputAvailable"
+             underscore_name="l30_bank6_input_available"
+             description="The percentage of time in which slice0 L3 bank6 has input available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ C 2 READ FADD C 1 READ FADD C 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank7 Input Available"
+             symbol_name="L30Bank7InputAvailable"
+             underscore_name="l30_bank7_input_available"
+             description="The percentage of time in which slice0 L3 bank7 has input available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ C 6 READ FADD C 5 READ FADD C 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gen12LP L3_3"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_3"
-       hw_config_guid="9c2cd379-bf93-4ded-b481-f64efd534c4a"
        chipset="TGL"
        symbol_name="L3_3"
+       underscore_name="l3_3"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="9c2cd379-bf93-4ded-b481-f64efd534c4a"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank4 Output Ready"
-             description="The percentage of time in which slice0 L3 bank4 output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ C 4 READ FADD 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank4_output_ready"
-             units="percent"
-             symbol_name="L30Bank4OutputReady"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
-             />
-    <counter name="Slice0 L3 Bank0 Output Ready"
-             description="The percentage of time in which slice0 L3 bank0 output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ C 6 READ FADD 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank0_output_ready"
-             units="percent"
-             symbol_name="L30Bank0OutputReady"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
              description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
              units="threads"
-             symbol_name="HsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Slice0 L3 Bank0 Output Ready"
+             symbol_name="L30Bank0OutputReady"
+             underscore_name="l30_bank0_output_ready"
+             description="The percentage of time in which slice0 L3 bank0 output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ C 6 READ FADD 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank4 Output Ready"
+             symbol_name="L30Bank4OutputReady"
+             underscore_name="l30_bank4_output_ready"
+             description="The percentage of time in which slice0 L3 bank4 output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ FADD 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gen12LP L3_4"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_4"
-       hw_config_guid="6a68185d-0056-4891-a5f6-29aa1e1d81ae"
        chipset="TGL"
        symbol_name="L3_4"
+       underscore_name="l3_4"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="6a68185d-0056-4891-a5f6-29aa1e1d81ae"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank1 Output Ready"
-             description="The percentage of time in which slice0 L3 bank1 output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 5 READ C 4 READ FADD 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank1_output_ready"
-             units="percent"
-             symbol_name="L30Bank1OutputReady"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
              description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
              units="threads"
-             symbol_name="HsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
+    <counter name="Slice0 L3 Bank1 Output Ready"
+             symbol_name="L30Bank1OutputReady"
+             underscore_name="l30_bank1_output_ready"
+             description="The percentage of time in which slice0 L3 bank1 output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ FADD 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
     <counter name="Slice0 L3 Bank5 Output Ready"
+             symbol_name="L30Bank5OutputReady"
+             underscore_name="l30_bank5_output_ready"
              description="The percentage of time in which slice0 L3 bank5 output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ C 6 READ FADD 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank5_output_ready"
              units="percent"
-             symbol_name="L30Bank5OutputReady"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 7 READ C 6 READ FADD 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gen12LP L3_5"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_5"
-       hw_config_guid="3f17a326-ae8b-4869-9f5a-3bccf793e287"
        chipset="TGL"
        symbol_name="L3_5"
+       underscore_name="l3_5"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="3f17a326-ae8b-4869-9f5a-3bccf793e287"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank6 Output Ready"
-             description="The percentage of time in which slice0 L3 bank6 output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ C 6 READ FADD 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank6_output_ready"
-             units="percent"
-             symbol_name="L30Bank6OutputReady"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
              description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
              units="threads"
-             symbol_name="HsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
              description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Slice0 L3 Bank2 Output Ready"
+             symbol_name="L30Bank2OutputReady"
+             underscore_name="l30_bank2_output_ready"
              description="The percentage of time in which slice0 L3 bank2 output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ FADD 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank2_output_ready"
              units="percent"
-             symbol_name="L30Bank2OutputReady"
-             availability="$SliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 5 READ C 4 READ FADD 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="slice"
+             />
+    <counter name="Slice0 L3 Bank6 Output Ready"
+             symbol_name="L30Bank6OutputReady"
+             underscore_name="l30_bank6_output_ready"
+             description="The percentage of time in which slice0 L3 bank6 output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 7 READ C 6 READ FADD 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Gen12LP L3_6"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="l3_6"
-       hw_config_guid="d312c40a-9fb7-489a-9a1a-9cd80aac6d61"
        chipset="TGL"
        symbol_name="L3_6"
+       underscore_name="l3_6"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="d312c40a-9fb7-489a-9a1a-9cd80aac6d61"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 L3 Bank7 Output Ready"
-             description="The percentage of time in which slice0 L3 bank7 output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="C 7 READ C 6 READ FADD 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank7_output_ready"
-             units="percent"
-             symbol_name="L30Bank7OutputReady"
-             availability="$SliceMask 1 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GTI/L3"
-             mdapi_hw_unit_type="slice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
              description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
              units="threads"
-             symbol_name="HsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Slice0 L3 Bank3 Output Ready"
+             symbol_name="L30Bank3OutputReady"
+             underscore_name="l30_bank3_output_ready"
              description="The percentage of time in which slice0 L3 bank3 output is ready"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ C 4 READ FADD 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="l30_bank3_output_ready"
              units="percent"
-             symbol_name="L30Bank3OutputReady"
+             semantic_type="duration"
+             equation="C 5 READ C 4 READ FADD 100 UMUL $GpuCoreClocks FDIV"
              availability="$SliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
              mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="slice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Slice0 L3 Bank7 Output Ready"
+             symbol_name="L30Bank7OutputReady"
+             underscore_name="l30_bank7_output_ready"
+             description="The percentage of time in which slice0 L3 bank7 output is ready"
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="C 7 READ C 6 READ FADD 100 UMUL $GpuCoreClocks FDIV"
+             availability="$SliceMask 1 AND"
+             mdapi_group="GTI/L3"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="slice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Sampler_1"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="sampler_1"
-       hw_config_guid="f0f255a4-535c-43ed-9d6b-85958cef6c1c"
        chipset="TGL"
        symbol_name="Sampler_1"
+       underscore_name="sampler_1"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="f0f255a4-535c-43ed-9d6b-85958cef6c1c"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 DualSubslice1 Input Available"
-             description="The percentage of time in which slice0 dualsubslice1 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_input_available"
-             units="percent"
-             symbol_name="Sampler01InputAvailable"
-             availability="$DualSubsliceMask 2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="dualsubslice"
-             />
-    <counter name="Slice0 DualSubslice2 Input Available"
-             description="The percentage of time in which slice0 dualsubslice2 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_input_available"
-             units="percent"
-             symbol_name="Sampler02InputAvailable"
-             availability="$DualSubsliceMask 4 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 DualSubslice3 Input Available"
-             description="The percentage of time in which slice0 dualsubslice3 sampler input is available"
-             data_type="float"
-             max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler03_input_available"
-             units="percent"
-             symbol_name="Sampler03InputAvailable"
-             availability="$DualSubsliceMask 8 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="dualsubslice"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
              units="threads"
-             symbol_name="HsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
     <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
              description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
-             units="threads"
-             symbol_name="VsThreads"
-             semantic_type="event"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
     <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
              description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 DualSubslice4 Input Available"
-             description="The percentage of time in which slice0 dualsubslice4 sampler input is available"
+    <counter name="Slice0 DualSubslice0 Input Available"
+             symbol_name="Sampler00InputAvailable"
+             underscore_name="sampler00_input_available"
+             description="The percentage of time in which slice0 dualsubslice0 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler04_input_available"
              units="percent"
-             symbol_name="Sampler04InputAvailable"
-             availability="$DualSubsliceMask 16 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="Slice0 DualSubslice1 Input Available"
+             symbol_name="Sampler01InputAvailable"
+             underscore_name="sampler01_input_available"
+             description="The percentage of time in which slice0 dualsubslice1 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Slice0 DualSubslice5 Input Available"
-             description="The percentage of time in which slice0 dualsubslice5 sampler input is available"
+    <counter name="Slice0 DualSubslice2 Input Available"
+             symbol_name="Sampler02InputAvailable"
+             underscore_name="sampler02_input_available"
+             description="The percentage of time in which slice0 dualsubslice2 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler05_input_available"
              units="percent"
-             symbol_name="Sampler05InputAvailable"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Slice0 DualSubslice0 Input Available"
-             description="The percentage of time in which slice0 dualsubslice0 sampler input is available"
+    <counter name="Slice0 DualSubslice3 Input Available"
+             symbol_name="Sampler03InputAvailable"
+             underscore_name="sampler03_input_available"
+             description="The percentage of time in which slice0 dualsubslice3 sampler input is available"
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_input_available"
              units="percent"
-             symbol_name="Sampler00InputAvailable"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="dualsubslice"
+             />
+    <counter name="Slice0 DualSubslice4 Input Available"
+             symbol_name="Sampler04InputAvailable"
+             underscore_name="sampler04_input_available"
+             description="The percentage of time in which slice0 dualsubslice4 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="dualsubslice"
+             />
+    <counter name="Slice0 DualSubslice5 Input Available"
+             symbol_name="Sampler05InputAvailable"
+             underscore_name="sampler05_input_available"
+             description="The percentage of time in which slice0 dualsubslice5 sampler input is available"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Sampler_2"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="sampler_2"
-       hw_config_guid="f47c6b97-fc10-4962-bb67-d623e9d6219b"
        chipset="TGL"
        symbol_name="Sampler_2"
+       underscore_name="sampler_2"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="f47c6b97-fc10-4962-bb67-d623e9d6219b"
        >
-    <counter name="GPU Core Clocks"
-             description="The total number of GPU core clocks elapsed during the measurement."
-             data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="Slice0 DualSubslice0 Sampler Output Ready"
-             description="The percentage of time in which slice0 dualsubslice0 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler00_output_ready"
-             units="percent"
-             symbol_name="Sampler00OutputReady"
-             availability="$DualSubsliceMask 1 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="Slice0 DualSubslice1 Sampler Output Ready"
-             description="The percentage of time in which slice0 dualsubslice1 sampler output is ready"
-             data_type="float"
-             max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler01_output_ready"
-             units="percent"
-             symbol_name="Sampler01OutputReady"
-             availability="$DualSubsliceMask 2 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
-             />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
-             units="percent"
-             symbol_name="EuFpuEmActive"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
-             units="threads"
-             symbol_name="GsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
+             description="The total number of GPU core clocks elapsed during the measurement."
+             data_type="uint64"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
-             units="percent"
-             symbol_name="EuStall"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 DualSubslice2 Sampler Output Ready"
-             description="The percentage of time in which slice0 dualsubslice2 sampler output is ready"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler02_output_ready"
              units="percent"
-             symbol_name="Sampler02OutputReady"
-             availability="$DualSubsliceMask 4 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Sampler"
-             mdapi_hw_unit_type="subslice"
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
              description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
              units="threads"
-             symbol_name="HsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
              units="threads"
-             symbol_name="CsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
              units="threads"
-             symbol_name="DsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
              mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Slice0 DualSubslice3 Sampler Output Ready"
-             description="The percentage of time in which slice0 dualsubslice3 sampler output is ready"
+    <counter name="Slice0 DualSubslice0 Sampler Output Ready"
+             symbol_name="Sampler00OutputReady"
+             underscore_name="sampler00_output_ready"
+             description="The percentage of time in which slice0 dualsubslice0 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler03_output_ready"
              units="percent"
-             symbol_name="Sampler03OutputReady"
-             availability="$DualSubsliceMask 8 AND"
              semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 DualSubslice1 Sampler Output Ready"
+             symbol_name="Sampler01OutputReady"
+             underscore_name="sampler01_output_ready"
+             description="The percentage of time in which slice0 dualsubslice1 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
+             mdapi_group="GPU/Sampler"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="subslice"
+             />
+    <counter name="Slice0 DualSubslice2 Sampler Output Ready"
+             symbol_name="Sampler02OutputReady"
+             underscore_name="sampler02_output_ready"
+             description="The percentage of time in which slice0 dualsubslice2 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
-             units="threads"
-             symbol_name="PsThreads"
-             semantic_type="event"
+    <counter name="Slice0 DualSubslice3 Sampler Output Ready"
+             symbol_name="Sampler03OutputReady"
+             underscore_name="sampler03_output_ready"
+             description="The percentage of time in which slice0 dualsubslice3 sampler output is ready"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
+             mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
+             mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 DualSubslice4 Sampler Output Ready"
+             symbol_name="Sampler04OutputReady"
+             underscore_name="sampler04_output_ready"
              description="The percentage of time in which slice0 dualsubslice4 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler04_output_ready"
              units="percent"
-             symbol_name="Sampler04OutputReady"
-             availability="$DualSubsliceMask 16 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
     <counter name="Slice0 DualSubslice5 Sampler Output Ready"
+             symbol_name="Sampler05OutputReady"
+             underscore_name="sampler05_output_ready"
              description="The percentage of time in which slice0 dualsubslice5 sampler output is ready"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="sampler05_output_ready"
              units="percent"
-             symbol_name="Sampler05OutputReady"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
              mdapi_group="GPU/Sampler"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="subslice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="TDL_1"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="tdl_1"
-       hw_config_guid="2e49d25e-93e8-4e2b-b91b-51731f5fb315"
        chipset="TGL"
        symbol_name="TDL_1"
+       underscore_name="tdl_1"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="2e49d25e-93e8-4e2b-b91b-51731f5fb315"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready"
              units="percent"
-             symbol_name="ThreadHeader01Ready"
-             availability="$DualSubsliceMask 2 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice2 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice2 thread dispatcher"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread02_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread02ReadyForDispatch"
-             availability="$DualSubsliceMask 4 AND"
              semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 2"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port2"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort2"
-             availability="$DualSubsliceMask 2 AND"
              semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 3"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port3"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort3"
-             availability="$DualSubsliceMask 2 AND"
              semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 0"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher"
+             symbol_name="NonPSThread00ReadyForDispatch"
+             underscore_name="non_ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort0"
-             availability="$DualSubsliceMask 2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 1"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher"
+             symbol_name="NonPSThread01ReadyForDispatch"
+             underscore_name="non_ps_thread01_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header01_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader01ReadyPort1"
-             availability="$DualSubsliceMask 2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice2 Thread Dispatcher"
+             symbol_name="NonPSThread02ReadyForDispatch"
+             underscore_name="non_ps_thread02_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread00ReadyForDispatch"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher"
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice3 Thread Dispatcher"
+             symbol_name="NonPSThread03ReadyForDispatch"
+             underscore_name="non_ps_thread03_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready"
              units="percent"
-             symbol_name="ThreadHeader00Ready"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice4 Thread Dispatcher"
+             symbol_name="NonPSThread04ReadyForDispatch"
+             underscore_name="non_ps_thread04_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice5 Thread Dispatcher"
+             symbol_name="NonPSThread05ReadyForDispatch"
+             underscore_name="non_ps_thread05_ready_for_dispatch"
+             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader00ReadyPort0"
+             underscore_name="thread_header00_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader00ReadyPort1"
+             underscore_name="thread_header00_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice3 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2"
+             symbol_name="ThreadHeader00ReadyPort2"
+             underscore_name="thread_header00_ready_port2"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 2"
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread03_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread03ReadyForDispatch"
-             availability="$DualSubsliceMask 8 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3"
+             symbol_name="ThreadHeader00ReadyPort3"
+             underscore_name="thread_header00_ready_port3"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 3"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice5 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader01ReadyPort0"
+             underscore_name="thread_header01_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread05_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread05ReadyForDispatch"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice4 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader01ReadyPort1"
+             underscore_name="thread_header01_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread04_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread04ReadyForDispatch"
-             availability="$DualSubsliceMask 16 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher"
-             description="The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2"
+             symbol_name="ThreadHeader01ReadyPort2"
+             underscore_name="thread_header01_ready_port2"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 2"
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="non_ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="NonPSThread01ReadyForDispatch"
-             availability="$DualSubsliceMask 2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 3"
+    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3"
+             symbol_name="ThreadHeader01ReadyPort3"
+             underscore_name="thread_header01_ready_port3"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 3"
              data_type="float"
              max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port3"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort3"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 2"
+    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher"
+             symbol_name="ThreadHeader00Ready"
+             underscore_name="thread_header00_ready"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port2"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort2"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 1"
+    <counter name="Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher"
+             symbol_name="ThreadHeader01Ready"
+             underscore_name="thread_header01_ready"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort1"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 0"
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header00_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader00ReadyPort0"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
   </set>
 
   <set name="TDL_2"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="tdl_2"
-       hw_config_guid="2a42ff25-99b1-4048-a121-f0664ed42c90"
        chipset="TGL"
        symbol_name="TDL_2"
+       underscore_name="tdl_2"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="2a42ff25-99b1-4048-a121-f0664ed42c90"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 1"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header05_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader05ReadyPort1"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread00_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread00ReadyForDispatch"
-             availability="$DualSubsliceMask 1 AND"
              semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice3 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread03_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread03ReadyForDispatch"
-             availability="$DualSubsliceMask 8 AND"
-             semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
-             />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher"
+             symbol_name="PSThread00ReadyForDispatch"
+             underscore_name="ps_thread00_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
              semantic_type="duration"
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 1 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
     <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher"
+             symbol_name="PSThread01ReadyForDispatch"
+             underscore_name="ps_thread01_ready_for_dispatch"
              description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread01_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread01ReadyForDispatch"
-             availability="$DualSubsliceMask 2 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 2 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
     <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice2 Thread Dispatcher"
+             symbol_name="PSThread02ReadyForDispatch"
+             underscore_name="ps_thread02_ready_for_dispatch"
              description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread02_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread02ReadyForDispatch"
-             availability="$DualSubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice3 Thread Dispatcher"
+             symbol_name="PSThread03ReadyForDispatch"
+             underscore_name="ps_thread03_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 3"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 3"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice4 Thread Dispatcher"
+             symbol_name="PSThread04ReadyForDispatch"
+             underscore_name="ps_thread04_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header05_ready_port3"
              units="percent"
-             symbol_name="ThreadHeader05ReadyPort3"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 0"
+    <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice5 Thread Dispatcher"
+             symbol_name="PSThread05ReadyForDispatch"
+             underscore_name="ps_thread05_ready_for_dispatch"
+             description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header05_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader05ReadyPort0"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher"
+             symbol_name="ThreadHeader05Ready"
+             underscore_name="thread_header05_ready"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice5 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader05ReadyPort0"
+             underscore_name="thread_header05_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread05_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread05ReadyForDispatch"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader05ReadyPort1"
+             underscore_name="thread_header05_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header05_ready"
              units="percent"
-             symbol_name="ThreadHeader05Ready"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
              mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
+             mdapi_hw_unit_type="dualsubslice"
              />
     <counter name="Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 2"
+             symbol_name="ThreadHeader05ReadyPort2"
+             underscore_name="thread_header05_ready_port2"
              description="The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 2"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header05_ready_port2"
              units="percent"
-             symbol_name="ThreadHeader05ReadyPort2"
-             availability="$DualSubsliceMask 32 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="PS Thread Ready For Dispatch on Slice0 Dualsubslice4 Thread Dispatcher"
-             description="The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 3"
+             symbol_name="ThreadHeader05ReadyPort3"
+             underscore_name="thread_header05_ready_port3"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 3"
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="ps_thread04_ready_for_dispatch"
              units="percent"
-             symbol_name="PSThread04ReadyForDispatch"
-             availability="$DualSubsliceMask 16 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 32 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
         <register type="NOA" address="0x00009840" value="0x00000000" />
   </set>
 
   <set name="TDL_3"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="tdl_3"
-       hw_config_guid="b763fa13-834e-4468-bba6-5f0d40db9813"
        chipset="TGL"
        symbol_name="TDL_3"
+       underscore_name="tdl_3"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="b763fa13-834e-4468-bba6-5f0d40db9813"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="GPU_CLOCK 0 READ"
              mdapi_group="GPU"
-             />
-    <counter name="EU Active"
-             description="The percentage of time in which the Execution Units were actively processing."
-             data_type="float"
-             max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
-             units="percent"
-             symbol_name="EuActive"
-             semantic_type="duration"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher"
-             data_type="float"
-             max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header03_ready"
-             units="percent"
-             symbol_name="ThreadHeader03Ready"
-             availability="$DualSubsliceMask 8 AND"
-             semantic_type="duration"
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
+             semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 3"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 3"
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
              data_type="float"
              max_equation="100"
-             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header03_ready_port3"
              units="percent"
-             symbol_name="ThreadHeader03ReadyPort3"
-             availability="$DualSubsliceMask 8 AND"
              semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
-             />
-    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 0"
-             data_type="float"
-             max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header03_ready_port0"
-             units="percent"
-             symbol_name="ThreadHeader03ReadyPort0"
-             availability="$DualSubsliceMask 8 AND"
-             semantic_type="duration"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 1"
-             data_type="float"
-             max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header03_ready_port1"
-             units="percent"
-             symbol_name="ThreadHeader03ReadyPort1"
-             availability="$DualSubsliceMask 8 AND"
-             semantic_type="duration"
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 2"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 2"
-             data_type="float"
-             max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header03_ready_port2"
-             units="percent"
-             symbol_name="ThreadHeader03ReadyPort2"
-             availability="$DualSubsliceMask 8 AND"
-             semantic_type="duration"
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
              description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 3"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 3"
+    <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
+             description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header04_ready_port3"
              units="percent"
-             symbol_name="ThreadHeader04ReadyPort3"
-             availability="$DualSubsliceMask 16 AND"
              semantic_type="duration"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 2"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 2"
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header04_ready_port2"
              units="percent"
-             symbol_name="ThreadHeader04ReadyPort2"
-             availability="$DualSubsliceMask 16 AND"
              semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 1"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 1"
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header04_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader04ReadyPort1"
-             availability="$DualSubsliceMask 16 AND"
              semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
-             mdapi_group="GPU/Thread Dispatcher"
-             mdapi_hw_unit_type="dualsubslice"
+             mdapi_hw_unit_type="gpu"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher"
+             symbol_name="ThreadHeader02Ready"
+             underscore_name="thread_header02_ready"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header04_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader04ReadyPort0"
-             availability="$DualSubsliceMask 16 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
-             data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
+    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher"
+             symbol_name="ThreadHeader03Ready"
+             underscore_name="thread_header03_ready"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher"
+             data_type="float"
+             max_equation="100"
+             units="percent"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
-             data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
+             mdapi_hw_unit_type="dualsubslice"
              />
     <counter name="Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher"
+             symbol_name="ThreadHeader04Ready"
+             underscore_name="thread_header04_ready"
              description="The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher"
              data_type="float"
              max_equation="100"
-             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header04_ready"
-             units="percent"
-             symbol_name="ThreadHeader04Ready"
-             availability="$DualSubsliceMask 16 AND"
+             units="percent"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="B 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader02ReadyPort0"
+             underscore_name="thread_header02_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
-             mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
     <counter name="Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader02ReadyPort1"
+             underscore_name="thread_header02_ready_port1"
              description="The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port1"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort1"
-             availability="$DualSubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 0"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 0"
+    <counter name="Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 2"
+             symbol_name="ThreadHeader02ReadyPort2"
+             underscore_name="thread_header02_ready_port2"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 2"
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port0"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort0"
-             availability="$DualSubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
     <counter name="Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 3"
+             symbol_name="ThreadHeader02ReadyPort3"
+             underscore_name="thread_header02_ready_port3"
              description="The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 3"
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port3"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort3"
-             availability="$DualSubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 4 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 2"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 2"
+    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader03ReadyPort0"
+             underscore_name="thread_header03_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready_port2"
              units="percent"
-             symbol_name="ThreadHeader02ReadyPort2"
-             availability="$DualSubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader03ReadyPort1"
+             underscore_name="thread_header03_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 1"
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
              semantic_type="duration"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 2"
+             symbol_name="ThreadHeader03ReadyPort2"
+             underscore_name="thread_header03_ready_port2"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 2"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 3"
+             symbol_name="ThreadHeader03ReadyPort3"
+             underscore_name="thread_header03_ready_port3"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 3"
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="C 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 8 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 0"
+             symbol_name="ThreadHeader04ReadyPort0"
+             underscore_name="thread_header04_ready_port0"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 0"
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
+    <counter name="Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 1"
+             symbol_name="ThreadHeader04ReadyPort1"
+             underscore_name="thread_header04_ready_port1"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 1"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 6 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
+             mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
+             mdapi_hw_unit_type="dualsubslice"
              />
-    <counter name="Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher"
-             description="The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher"
+    <counter name="Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 2"
+             symbol_name="ThreadHeader04ReadyPort2"
+             underscore_name="thread_header04_ready_port2"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 2"
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="thread_header02_ready"
              units="percent"
-             symbol_name="ThreadHeader02Ready"
-             availability="$DualSubsliceMask 4 AND"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="B 5 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
+             mdapi_group="GPU/Thread Dispatcher"
              mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="dualsubslice"
+             />
+    <counter name="Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 3"
+             symbol_name="ThreadHeader04ReadyPort3"
+             underscore_name="thread_header04_ready_port3"
+             description="The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 3"
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="B 4 READ 100 UMUL $GpuCoreClocks FDIV"
+             availability="$DualSubsliceMask 16 AND"
              mdapi_group="GPU/Thread Dispatcher"
+             mdapi_usage_flags="Tier3 Overview Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="dualsubslice"
              />
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />
         <register type="NOA" address="0x00009840" value="0x00000000" />
   </set>
 
   <set name="GpuBusyness"
-       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
-       underscore_name="gpu_busyness"
-       hw_config_guid="cc935a3e-8d96-4b47-bc46-3d84247e9a3a"
        chipset="TGL"
        symbol_name="GpuBusyness"
+       underscore_name="gpu_busyness"
+       mdapi_supported_apis="OGL OGL4 OCL MEDIA IO BB"
+       hw_config_guid="cc935a3e-8d96-4b47-bc46-3d84247e9a3a"
        >
+    <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
+             description="Time elapsed on the GPU during the measurement."
+             data_type="uint64"
+             units="ns"
+             semantic_type="duration"
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
-             units="cycles"
-             symbol_name="GpuCoreClocks"
+             units="cycles"
+             semantic_type="event"
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
+             description="Average GPU Core Frequency in the measurement."
+             data_type="uint64"
+             max_equation="$GpuMaxFrequency"
+             units="hz"
              semantic_type="event"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="GPU Busy"
+             symbol_name="GpuBusy"
+             underscore_name="gpu_busy"
+             description="The percentage of time in which the GPU has been processing GPU commands."
+             data_type="float"
+             max_equation="100"
+             units="percent"
+             semantic_type="duration"
+             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="EU Active"
+             symbol_name="EuActive"
+             underscore_name="eu_active"
              description="The percentage of time in which the Execution Units were actively processing."
              data_type="float"
              max_equation="100"
-             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_active"
              units="percent"
-             symbol_name="EuActive"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             equation="2 A 7 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vebox Ring Busy"
-             description="The percentage of time when vebox command streamer was busy."
+    <counter name="EU Stall"
+             symbol_name="EuStall"
+             underscore_name="eu_stall"
+             description="The percentage of time in which the Execution Units were stalled."
              data_type="float"
              max_equation="100"
-             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vebox_busy"
              units="percent"
-             symbol_name="VeboxBusy"
              semantic_type="duration"
+             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Any Engine Busy"
-             description="The percentage of time when any command streamer was busy."
+    <counter name="EU Thread Occupancy"
+             symbol_name="EuThreadOccupancy"
+             underscore_name="eu_thread_occupancy"
+             description="The percentage of time in which hardware threads occupied EUs."
              data_type="float"
              max_equation="100"
-             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="any_engine_busy"
              units="percent"
-             symbol_name="AnyEngineBusy"
              semantic_type="duration"
+             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array"
+             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GS Threads Dispatched"
-             description="The total number of geometry shader hardware threads dispatched."
+    <counter name="VS Threads Dispatched"
+             symbol_name="VsThreads"
+             underscore_name="vs_threads"
+             description="The total number of vertex shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 5 READ"
-             underscore_name="gs_threads"
              units="threads"
-             symbol_name="GsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 1 READ"
+             mdapi_group="EU Array/Vertex Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Geometry Shader"
              />
-    <counter name="VS Threads Dispatched"
-             description="The total number of vertex shader hardware threads dispatched."
+    <counter name="HS Threads Dispatched"
+             symbol_name="HsThreads"
+             underscore_name="hs_threads"
+             description="The total number of hull shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 1 READ"
-             underscore_name="vs_threads"
              units="threads"
-             symbol_name="VsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 2 READ"
+             mdapi_group="EU Array/Hull Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Vertex Shader"
              />
-    <counter name="FS Threads Dispatched"
-             description="The total number of fragment shader hardware threads dispatched."
+    <counter name="DS Threads Dispatched"
+             symbol_name="DsThreads"
+             underscore_name="ds_threads"
+             description="The total number of domain shader hardware threads dispatched."
              data_type="uint64"
-             equation="A 6 READ"
-             underscore_name="ps_threads"
              units="threads"
-             symbol_name="PsThreads"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="A 3 READ"
+             mdapi_group="EU Array/Domain Shader"
              mdapi_usage_flags="Tier3 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Fragment Shader"
              />
-    <counter name="Compute Ring Busy"
-             description="The percentage of time when render command streamer was busy."
-             data_type="float"
-             max_equation="100"
-             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="compute_busy"
-             units="percent"
-             symbol_name="ComputeBusy"
-             semantic_type="duration"
+    <counter name="GS Threads Dispatched"
+             symbol_name="GsThreads"
+             underscore_name="gs_threads"
+             description="The total number of geometry shader hardware threads dispatched."
+             data_type="uint64"
+             units="threads"
+             semantic_type="event"
+             equation="A 5 READ"
+             mdapi_group="EU Array/Geometry Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Time Elapsed"
-             description="Time elapsed on the GPU during the measurement."
+    <counter name="FS Threads Dispatched"
+             symbol_name="PsThreads"
+             underscore_name="ps_threads"
+             description="The total number of fragment shader hardware threads dispatched."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
-             units="ns"
-             symbol_name="GpuTime"
-             semantic_type="duration"
+             units="threads"
+             semantic_type="event"
+             equation="A 6 READ"
+             mdapi_group="EU Array/Fragment Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="AVG GPU Core Frequency"
-             description="Average GPU Core Frequency in the measurement."
+    <counter name="CS Threads Dispatched"
+             symbol_name="CsThreads"
+             underscore_name="cs_threads"
+             description="The total number of compute shader hardware threads dispatched."
              data_type="uint64"
-             max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
-             units="hz"
-             symbol_name="AvgGpuCoreFrequency"
+             units="threads"
              semantic_type="event"
+             equation="A 4 READ"
+             mdapi_group="EU Array/Compute Shader"
+             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <counter name="Render Ring Busy"
+             symbol_name="RenderBusy"
+             underscore_name="render_busy"
              description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_busy"
              units="percent"
-             symbol_name="RenderBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 6 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="CS Threads Dispatched"
-             description="The total number of compute shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 4 READ"
-             underscore_name="cs_threads"
-             units="threads"
-             symbol_name="CsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="System Frame Batch"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Compute Shader"
              />
-    <counter name="Posh Ring Busy"
-             description="The percentage of time when posh command streamer was busy."
+    <counter name="Compute Ring Busy"
+             symbol_name="ComputeBusy"
+             underscore_name="compute_busy"
+             description="The percentage of time when render command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="posh_engine_busy"
              units="percent"
-             symbol_name="PoshEngineBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 3 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="GPU Busy"
-             description="The percentage of time in which the GPU has been processing GPU commands."
+    <counter name="Posh Ring Busy"
+             symbol_name="PoshEngineBusy"
+             underscore_name="posh_engine_busy"
+             description="The percentage of time when posh command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="A 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="gpu_busy"
              units="percent"
-             symbol_name="GpuBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame"
+             equation="C 5 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vdbox1 Ring Busy"
-             description="The percentage of time when Vdbox1 command streamer was busy."
+    <counter name="Blitter Ring Busy"
+             symbol_name="BlitterBusy"
+             underscore_name="blitter_busy"
+             description="The percentage of time when blitter command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox1_busy"
              units="percent"
-             symbol_name="Vdbox1Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU FPU And EM Pipes Active"
-             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
+    <counter name="Vebox Ring Busy"
+             symbol_name="VeboxBusy"
+             underscore_name="vebox_busy"
+             description="The percentage of time when vebox command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_fpu_em_active"
              units="percent"
-             symbol_name="EuFpuEmActive"
              semantic_type="duration"
+             equation="C 7 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
-             mdapi_group="EU Array/Pipes"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Render and compute engines are simultaneously busy"
-             description="The percentage of time when render and compute engines are simultaneously busy"
+    <counter name="Vdbox0 Ring Busy"
+             symbol_name="Vdbox0Busy"
+             underscore_name="vdbox0_busy"
+             description="The percentage of time when Vdbox0 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="render_and_compute_busy"
              units="percent"
-             symbol_name="RenderAndComputeBusy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
+             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="System Frame Batch"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="Vdbox0 Ring Busy"
-             description="The percentage of time when Vdbox0 command streamer was busy."
+    <counter name="Vdbox1 Ring Busy"
+             symbol_name="Vdbox1Busy"
+             underscore_name="vdbox1_busy"
+             description="The percentage of time when Vdbox1 command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="C 1 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="vdbox0_busy"
              units="percent"
-             symbol_name="Vdbox0Busy"
              semantic_type="duration"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
+             equation="C 2 READ 100 UMUL $GpuCoreClocks FDIV"
              mdapi_group="GPU"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="DS Threads Dispatched"
-             description="The total number of domain shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 3 READ"
-             underscore_name="ds_threads"
-             units="threads"
-             symbol_name="DsThreads"
-             semantic_type="event"
+             mdapi_usage_flags="System Frame Batch"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Domain Shader"
              />
-    <counter name="EU Thread Occupancy"
-             description="The percentage of time in which hardware threads occupied EUs."
+    <counter name="Render and compute engines are simultaneously busy"
+             symbol_name="RenderAndComputeBusy"
+             underscore_name="render_and_compute_busy"
+             description="The percentage of time when render and compute engines are simultaneously busy"
              data_type="float"
              max_equation="100"
-             equation="2 8 A 10 READ FMUL $EuThreadsCount FDIV FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_thread_occupancy"
              units="percent"
-             symbol_name="EuThreadOccupancy"
              semantic_type="duration"
+             equation="B 0 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="EU Stall"
-             description="The percentage of time in which the Execution Units were stalled."
+    <counter name="Any Engine Busy"
+             symbol_name="AnyEngineBusy"
+             underscore_name="any_engine_busy"
+             description="The percentage of time when any command streamer was busy."
              data_type="float"
              max_equation="100"
-             equation="2 A 8 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="eu_stall"
              units="percent"
-             symbol_name="EuStall"
              semantic_type="duration"
+             equation="B 1 READ 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="Tier2 Overview System Frame Batch Draw"
-             mdapi_group="EU Array"
-             mdapi_hw_unit_type="gpu"
-             />
-    <counter name="HS Threads Dispatched"
-             description="The total number of hull shader hardware threads dispatched."
-             data_type="uint64"
-             equation="A 2 READ"
-             underscore_name="hs_threads"
-             units="threads"
-             symbol_name="HsThreads"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier3 Frame Batch Draw"
              mdapi_hw_unit_type="gpu"
-             mdapi_group="EU Array/Hull Shader"
              />
-    <counter name="Blitter Ring Busy"
-             description="The percentage of time when blitter command streamer was busy."
+    <counter name="EU FPU And EM Pipes Active"
+             symbol_name="EuFpuEmActive"
+             underscore_name="eu_fpu_em_active"
+             description="The percentage of time in which EU FPU and EM pipelines were actively processing."
              data_type="float"
              max_equation="100"
-             equation="C 4 READ 100 UMUL $GpuCoreClocks FDIV"
-             underscore_name="blitter_busy"
              units="percent"
-             symbol_name="BlitterBusy"
              semantic_type="duration"
+             equation="2 A 9 READ FMUL $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV"
+             mdapi_group="EU Array/Pipes"
+             mdapi_usage_flags="Tier3 Overview System Frame Batch Draw"
              mdapi_supported_apis=""
-             mdapi_usage_flags="System Frame Batch"
-             mdapi_group="GPU"
              mdapi_hw_unit_type="gpu"
              />
     <register_config type="NOA">
   </set>
 
   <set name="Metric set TestOa"
-       mdapi_supported_apis="OGL OCL IO BB"
-       underscore_name="test_oa"
-       hw_config_guid="80a833f0-2504-4321-8894-e9277844ce7b"
        chipset="TGL"
        symbol_name="TestOa"
+       underscore_name="test_oa"
+       mdapi_supported_apis="OGL OCL IO BB"
+       hw_config_guid="80a833f0-2504-4321-8894-e9277844ce7b"
        >
-    <counter name="TestCounter7"
-             description="HW test counter 7. Factor: 0.6666"
-             data_type="uint64"
-             equation="B 7 READ"
-             underscore_name="counter7"
-             units="events"
-             symbol_name="Counter7"
-             semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
-             />
     <counter name="GPU Time Elapsed"
+             symbol_name="GpuTime"
+             underscore_name="gpu_time"
              description="Time elapsed on the GPU during the measurement."
              data_type="uint64"
-             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
-             underscore_name="gpu_time"
              units="ns"
-             symbol_name="GpuTime"
              semantic_type="duration"
-             mdapi_supported_apis=""
+             equation="GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="GPU Core Clocks"
+             symbol_name="GpuCoreClocks"
+             underscore_name="gpu_core_clocks"
              description="The total number of GPU core clocks elapsed during the measurement."
              data_type="uint64"
-             equation="GPU_CLOCK 0 READ"
-             underscore_name="gpu_core_clocks"
              units="cycles"
-             symbol_name="GpuCoreClocks"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="GPU_CLOCK 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Tier1 Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="AVG GPU Core Frequency"
+             symbol_name="AvgGpuCoreFrequency"
+             underscore_name="avg_gpu_core_frequency"
              description="Average GPU Core Frequency in the measurement."
              data_type="uint64"
              max_equation="$GpuMaxFrequency"
-             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
-             underscore_name="avg_gpu_core_frequency"
              units="hz"
-             symbol_name="AvgGpuCoreFrequency"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             equation="$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV"
              mdapi_group="GPU"
+             mdapi_usage_flags="Tier1 Overview System Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
              />
-    <counter name="TestCounter8"
-             description="HW test counter 8. Should be equal to 1 in IOStream or in OAG query mode"
+    <counter name="TestCounter0"
+             symbol_name="Counter0"
+             underscore_name="counter0"
+             description="HW test counter 0. Factor: 0.0"
              data_type="uint64"
-             equation="C 0 READ"
-             underscore_name="counter8"
              units="events"
-             symbol_name="Counter8"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter9 - OAR enable"
-             description="HW test counter 9. Should be equal to 1 in query."
+    <counter name="TestCounter1"
+             symbol_name="Counter1"
+             underscore_name="counter1"
+             description="HW test counter 1. Factor: 1.0"
              data_type="uint64"
-             equation="C 1 READ"
-             underscore_name="counter9"
              units="events"
-             symbol_name="Counter9"
              semantic_type="event"
+             equation="B 1 READ"
+             mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
              mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter2"
+             symbol_name="Counter2"
+             underscore_name="counter2"
+             description="HW test counter 2. Factor: 1.0"
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="B 2 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
+             />
+    <counter name="TestCounter3"
+             symbol_name="Counter3"
+             underscore_name="counter3"
+             description="HW test counter 3. Factor: 0.5"
+             data_type="uint64"
+             units="events"
+             semantic_type="event"
+             equation="B 3 READ"
              mdapi_group="GPU"
+             mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
+             mdapi_hw_unit_type="gpu"
              />
     <counter name="TestCounter4"
+             symbol_name="Counter4"
+             underscore_name="counter4"
              description="HW test counter 4. Factor: 0.3333"
              data_type="uint64"
-             equation="B 4 READ"
-             underscore_name="counter4"
              units="events"
-             symbol_name="Counter4"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 4 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="TestCounter5"
+             symbol_name="Counter5"
+             underscore_name="counter5"
              description="HW test counter 5. Factor: 0.3333"
              data_type="uint64"
-             equation="B 5 READ"
-             underscore_name="counter5"
              units="events"
-             symbol_name="Counter5"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 5 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <counter name="TestCounter6"
+             symbol_name="Counter6"
+             underscore_name="counter6"
              description="HW test counter 6. Factor: 0.16666"
              data_type="uint64"
-             equation="B 6 READ"
-             underscore_name="counter6"
              units="events"
-             symbol_name="Counter6"
              semantic_type="event"
-             mdapi_supported_apis=""
-             mdapi_usage_flags="Frame Batch Draw"
-             mdapi_hw_unit_type="gpu"
+             equation="B 6 READ"
              mdapi_group="GPU"
-             />
-    <counter name="TestCounter3"
-             description="HW test counter 3. Factor: 0.5"
-             data_type="uint64"
-             equation="B 3 READ"
-             underscore_name="counter3"
-             units="events"
-             symbol_name="Counter3"
-             semantic_type="event"
-             mdapi_supported_apis=""
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter0"
-             description="HW test counter 0. Factor: 0.0"
+    <counter name="TestCounter7"
+             symbol_name="Counter7"
+             underscore_name="counter7"
+             description="HW test counter 7. Factor: 0.6666"
              data_type="uint64"
-             equation="B 0 READ"
-             underscore_name="counter0"
              units="events"
-             symbol_name="Counter0"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="B 7 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter1"
-             description="HW test counter 1. Factor: 1.0"
+    <counter name="TestCounter8"
+             symbol_name="Counter8"
+             underscore_name="counter8"
+             description="HW test counter 8. Should be equal to 1 in IOStream or in OAG query mode"
              data_type="uint64"
-             equation="B 1 READ"
-             underscore_name="counter1"
              units="events"
-             symbol_name="Counter1"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 0 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
-    <counter name="TestCounter2"
-             description="HW test counter 2. Factor: 1.0"
+    <counter name="TestCounter9 - OAR enable"
+             symbol_name="Counter9"
+             underscore_name="counter9"
+             description="HW test counter 9. Should be equal to 1 in query."
              data_type="uint64"
-             equation="B 2 READ"
-             underscore_name="counter2"
              units="events"
-             symbol_name="Counter2"
              semantic_type="event"
-             mdapi_supported_apis=""
+             equation="C 1 READ"
+             mdapi_group="GPU"
              mdapi_usage_flags="Frame Batch Draw"
+             mdapi_supported_apis=""
              mdapi_hw_unit_type="gpu"
-             mdapi_group="GPU"
              />
     <register_config type="NOA">
         <register type="NOA" address="0x00000D04" value="0x00000200" />